Set path Laura: ONLY USE FOR LAURA

# base_path <- "//home.kt.ktzh.ch/B117T23$/Desktop/Riskktaking/Data"
base_path <- "/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/Data_S3"

Library

library(tidyverse)
library(ggplot2)
library(specr)
library(specr)
library(readxl)
library(ggthemes)
library(cowplot)
library(dplyr)
library(knitr)
library(kableExtra)
library(Hmisc)
remotes::install_github("masurp/specr")
library(ggplot2)
library(cowplot)

Load all data

# Combined GPS WVS
risktaking <- read.csv(file.path(base_path, "Specification_curve.csv"), header=TRUE, as.is=TRUE)

# Umbenennen von 'isocode' zu 'COUNTRY' in risktaking
risktaking <- risktaking %>%
  rename(COUNTRY = isocode)

risktaking <- risktaking %>%
  select(-hardship_index, -worldmap, -source)

hardship_hs <- read.csv(file.path(base_path, "hardship_HS.csv"))
hardship_finance <- read.csv(file.path(base_path, "hardship_finance.csv"))
hardship_crime <- read.csv(file.path(base_path, "hardship_crime.csv"))
hardship_environment <- read.csv(file.path(base_path, "hardship_environment.csv"))

# Entfernen der nicht benötigten Spalten aus den Datensätzen vor dem Zusammenführen
hardship_hs <- hardship_hs %>%
  select(-country, -avg_risktaking)
hardship_finance <- hardship_finance %>%
  select(-country, -avg_risktaking)
hardship_crime <- hardship_crime %>%
  select(-country, -avg_risktaking)
hardship_environment <- hardship_environment %>%
  select(-country, -avg_risktaking)

# Zusammenführen der Datensätze
hardship_combined <- risktaking %>%
  left_join(hardship_hs, by = "COUNTRY") %>%
  left_join(hardship_finance, by = "COUNTRY") %>%
  left_join(hardship_crime, by = "COUNTRY") %>%
  left_join(hardship_environment, by = "COUNTRY")

#str(hardship_combined)
head(hardship_combined)
##   country COUNTRY gender  age_scale age risktaking HS_alc_tax_wine
## 1  Turkey     TUR      1 -0.9021528  26   60.19755        0.205358
## 2  Turkey     TUR      1  0.4737750  50   53.86746        0.205358
## 3  Turkey     TUR      1 -1.1888045  21   60.19755        0.205358
## 4  Turkey     TUR      0 -1.0168135  24   62.71073        0.205358
## 5  Turkey     TUR      0 -1.0168135  24   61.22168        0.205358
## 6  Turkey     TUR      0 -1.3034651  19   69.63037        0.205358
##   HS_alc_roaddeath HS_drg_treatment HS_nic_affordability HS_mh_policy
## 1        -0.962363      -0.08607804           0.03046158            0
## 2        -0.962363      -0.08607804           0.03046158            0
## 3        -0.962363      -0.08607804           0.03046158            0
## 4        -0.962363      -0.08607804           0.03046158            0
## 5        -0.962363      -0.08607804           0.03046158            0
## 6        -0.962363      -0.08607804           0.03046158            0
##   HS_sex_gini HS_oth_obesity HS_oth_cleancooking HS_mh_mhhospit
## 1  0.08475973     -0.9201102          -0.4749541      -1.073742
## 2  0.08475973     -0.9201102          -0.4749541      -1.073742
## 3  0.08475973     -0.9201102          -0.4749541      -1.073742
## 4  0.08475973     -0.9201102          -0.4749541      -1.073742
## 5  0.08475973     -0.9201102          -0.4749541      -1.073742
## 6  0.08475973     -0.9201102          -0.4749541      -1.073742
##   HS_sex_antiretroviral HS_original_lifeexpectancy HS_original_genderequality
## 1          -0.007855567                 -0.1578527                 -0.1967259
## 2          -0.007855567                 -0.1578527                 -0.1967259
## 3          -0.007855567                 -0.1578527                 -0.1967259
## 4          -0.007855567                 -0.1578527                 -0.1967259
## 5          -0.007855567                 -0.1578527                 -0.1967259
## 6          -0.007855567                 -0.1578527                 -0.1967259
##   hardship_HS_index f_inv_acctownership_primaryedu f_oth_insfinsvcs_int
## 1        -0.1918856                      -0.495424            -1.339809
## 2        -0.1918856                      -0.495424            -1.339809
## 3        -0.1918856                      -0.495424            -1.339809
## 4        -0.1918856                      -0.495424            -1.339809
## 5        -0.1918856                      -0.495424            -1.339809
## 6        -0.1918856                      -0.495424            -1.339809
##   f_hs_oopexp10 f_eco_gdpdefl_linked f_eco_cpi f_original_gdp f_original_gini
## 1     0.5745653            0.5080757  0.753765     -0.6480058       0.5769059
## 2     0.5745653            0.5080757  0.753765     -0.6480058       0.5769059
## 3     0.5745653            0.5080757  0.753765     -0.6480058       0.5769059
## 4     0.5745653            0.5080757  0.753765     -0.6480058       0.5769059
## 5     0.5745653            0.5080757  0.753765     -0.6480058       0.5769059
## 6     0.5745653            0.5080757  0.753765     -0.6480058       0.5769059
##   hardship_Finance_index c_bh_homicide c_bh_childmalt c_bh_violextchildprot
## 1           -0.009989512     0.2305293      0.2262505            -0.5852603
## 2           -0.009989512     0.2305293      0.2262505            -0.5852603
## 3           -0.009989512     0.2305293      0.2262505            -0.5852603
## 4           -0.009989512     0.2305293      0.2262505            -0.5852603
## 5           -0.009989512     0.2305293      0.2262505            -0.5852603
## 6           -0.009989512     0.2305293      0.2262505            -0.5852603
##   c_bh_parviolenceprog c_bh_elderabuse c_theft_estcorruption c_oth_polstab
## 1             1.044673      -0.9626803          -0.005687925     0.7771958
## 2             1.044673      -0.9626803          -0.005687925     0.7771958
## 3             1.044673      -0.9626803          -0.005687925     0.7771958
## 4             1.044673      -0.9626803          -0.005687925     0.7771958
## 5             1.044673      -0.9626803          -0.005687925     0.7771958
## 6             1.044673      -0.9626803          -0.005687925     0.7771958
##   hardship_Crime_index e_oth_drinkingwater e_exp_watersanithyg100k e_ses_gini
## 1            0.1285553          -0.4208726               0.1506879 0.09919834
## 2            0.1285553          -0.4208726               0.1506879 0.09919834
## 3            0.1285553          -0.4208726               0.1506879 0.09919834
## 4            0.1285553          -0.4208726               0.1506879 0.09919834
## 5            0.1285553          -0.4208726               0.1506879 0.09919834
## 6            0.1285553          -0.4208726               0.1506879 0.09919834
##   e_ses_school e_exp_disaster e_exp_airdeath100k e_exp_watersanithyg
## 1    0.5982926      0.8649465         -0.1717911           0.0709064
## 2    0.5982926      0.8649465         -0.1717911           0.0709064
## 3    0.5982926      0.8649465         -0.1717911           0.0709064
## 4    0.5982926      0.8649465         -0.1717911           0.0709064
## 5    0.5982926      0.8649465         -0.1717911           0.0709064
## 6    0.5982926      0.8649465         -0.1717911           0.0709064
##   hardship_environment_index
## 1                  0.1627161
## 2                  0.1627161
## 3                  0.1627161
## 4                  0.1627161
## 5                  0.1627161
## 6                  0.1627161

Corrlation Heatmap

library(reshape2)

# Auswahl relevanter Spalten
selected_vars <- hardship_combined %>% 
  select(risktaking, "HS_alc_tax_wine", "HS_alc_roaddeath", "HS_drg_treatment", "HS_nic_affordability", "HS_mh_policy", "HS_sex_gini",
        "HS_oth_obesity", "HS_oth_cleancooking", "HS_mh_mhhospit", "HS_sex_antiretroviral", "HS_original_lifeexpectancy",
        "HS_original_genderequality",
        "f_inv_acctownership_primaryedu", "f_oth_insfinsvcs_int", "f_hs_oopexp10", "f_eco_gdpdefl_linked", "f_eco_cpi",
        "f_original_gdp", "f_original_gini",
        "c_bh_homicide", "c_bh_childmalt", "c_bh_violextchildprot", "c_bh_parviolenceprog", "c_bh_elderabuse",
        "c_theft_estcorruption", "c_oth_polstab", 
        "e_oth_drinkingwater", "e_exp_watersanithyg100k", "e_ses_gini", "e_ses_school", "e_exp_disaster",  "e_exp_airdeath100k",
        "e_exp_watersanithyg")

# Berechnung der Korrelationsmatrix für ausgewählte Variablen
cor_matrix <- cor(selected_vars, use = "complete.obs")

# Umwandlung der Korrelationsmatrix in einen Datensatz für ggplot
melted_cor_matrix <- melt(cor_matrix)

# Erstellung der Heatmap mit Korrelationskoeffizienten
library(ggplot2)
ggplot(melted_cor_matrix, aes(Var1, Var2, fill = value)) +
  geom_tile() +  # Zeichnet die Kacheln
  geom_text(aes(label = sprintf("%.2f", value)), color = "black", size = 2.5) +  # Fügt die Korrelationskoeffizienten hinzu
  scale_fill_gradient2(midpoint = 0, low = "blue", high = "red", mid = "white", limit = c(-1,1)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8), axis.text.y = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(fill = "Korrelation", x = NULL, y = NULL) +
  coord_fixed()

ggsave("correlation_heatmap.png", width = 12, height = 10, dpi = 300)

Age categories

# Neue Alterskategorien definieren
hardship_combined$age_category <- cut(hardship_combined$age,
                                      breaks = c(15, 25, 45, 65, 99),  
                                      labels = c("Young Adults (15-25)", 
                                                 "Adults (26-45)", 
                                                 "Middle-aged Adults (46-65)", 
                                                 "Seniors (66-99)"),
                                      right = TRUE, include.lowest = TRUE)

# Umwandlung der Alterskategorien in numerische Werte
hardship_combined$age_numeric <- as.integer(factor(hardship_combined$age_category))

# Überprüfung der neuen numerischen Alterskategorien
table(hardship_combined$age_category)
## 
##       Young Adults (15-25)             Adults (26-45) 
##                      46512                      94122 
## Middle-aged Adults (46-65)            Seniors (66-99) 
##                      64022                      23310

Variance of age categories

age_variance <- hardship_combined %>%
  group_by(age_category) %>%
  summarise(Varianz = var(age, na.rm = TRUE))

print(age_variance)
## # A tibble: 4 × 2
##   age_category               Varianz
##   <fct>                        <dbl>
## 1 Young Adults (15-25)          7.54
## 2 Adults (26-45)               33.4 
## 3 Middle-aged Adults (46-65)   32.4 
## 4 Seniors (66-99)              33.5
age_sd <- hardship_combined %>%
  group_by(age_category) %>%
  summarise(Standardabweichung = sd(age, na.rm = TRUE))

print(age_sd)
## # A tibble: 4 × 2
##   age_category               Standardabweichung
##   <fct>                                   <dbl>
## 1 Young Adults (15-25)                     2.75
## 2 Adults (26-45)                           5.77
## 3 Middle-aged Adults (46-65)               5.69
## 4 Seniors (66-99)                          5.79
library(ggplot2)

ggplot(hardship_combined, aes(x = age_category, y = age)) +
  geom_boxplot() +
  labs(title = "Altersverteilung nach Alterskategorie", x = "Age category", y = "Age") +
  theme_minimal()

Aggregate data

hardship_combined_agg <- hardship_combined %>%
  group_by(COUNTRY, gender, age_numeric) %>%  # Gruppierung nach Land, Geschlecht, Alter
  summarise(
    risktaking = mean(risktaking, na.rm = TRUE),
    across(starts_with("HS_"), mean, na.rm = TRUE),
    across(starts_with("f_"), mean, na.rm = TRUE),
    across(starts_with("e_"), mean, na.rm = TRUE),
    across(starts_with("c_"), mean, na.rm = TRUE),
    age_scale = mean(age_scale, na.rm = TRUE),
    n = n(),  # Anzahl der Personen pro Gruppe
    .groups = "drop"
  )

hardship_combined_agg <- hardship_combined_agg %>%
  mutate(across(where(is.numeric), ~replace_na(.x, mean(.x, na.rm = TRUE))))

Table with Correlation hardship factors and risktaking

# Laden notwendiger Bibliotheken
library(Hmisc)
library(kableExtra)

# Auswahl aller numerischen Variablen
numeric_vars <- hardship_combined %>%
  select(where(is.numeric))

# Berechnen der Korrelationsmatrix und der p-Werte
cor_results <- rcorr(as.matrix(numeric_vars))

# Korrelationen und p-Werte spezifisch für 'risktaking' extrahieren
correlations <- cor_results$r[, "risktaking"]  # Korrelationen zu 'risktaking'
p_values <- cor_results$P[, "risktaking"]      # p-Werte zu 'risktaking'

# Datenrahmen für die Darstellung erstellen
cor_table <- data.frame(
  Variable = rownames(cor_results$r),  # Namen der Variablen
  Correlation = round(correlations, 5),  # Korrelationswerte, gerundet auf 5 Dezimalstellen
  P_value = format(p_values, scientific = TRUE),  # p-Werte in wissenschaftlicher Notation
  Significant = ifelse(p_values < 0.05, "Yes", "No")  # Signifikanzflag, basierend auf p-Wert
)

# Tabellendarstellung mit 'kable' und 'kableExtra'
cor_table %>%
  kable("html", caption = "Correlations with Risktaking: Summary of Results") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
  column_spec(2, bold = TRUE) %>%
  column_spec(3, background = "lightyellow")
Correlations with Risktaking: Summary of Results
Variable Correlation P_value Significant
gender gender -0.12013 0.000000e+00 Yes
age_scale age_scale -0.24165 0.000000e+00 Yes
age age -0.24153 0.000000e+00 Yes
risktaking risktaking 1.00000 NA NA
HS_alc_tax_wine HS_alc_tax_wine 0.08408 0.000000e+00 Yes
HS_alc_roaddeath HS_alc_roaddeath 0.07748 0.000000e+00 Yes
HS_drg_treatment HS_drg_treatment 0.07606 0.000000e+00 Yes
HS_nic_affordability HS_nic_affordability 0.07479 0.000000e+00 Yes
HS_mh_policy HS_mh_policy -0.00730 5.432256e-04 Yes
HS_sex_gini HS_sex_gini 0.08042 0.000000e+00 Yes
HS_oth_obesity HS_oth_obesity 0.01722 4.440892e-16 Yes
HS_oth_cleancooking HS_oth_cleancooking 0.10040 0.000000e+00 Yes
HS_mh_mhhospit HS_mh_mhhospit 0.03081 0.000000e+00 Yes
HS_sex_antiretroviral HS_sex_antiretroviral 0.05036 0.000000e+00 Yes
HS_original_lifeexpectancy HS_original_lifeexpectancy 0.13508 0.000000e+00 Yes
HS_original_genderequality HS_original_genderequality 0.05193 0.000000e+00 Yes
hardship_HS_index hardship_HS_index 0.11857 0.000000e+00 Yes
f_inv_acctownership_primaryedu f_inv_acctownership_primaryedu 0.02165 0.000000e+00 Yes
f_oth_insfinsvcs_int f_oth_insfinsvcs_int 0.05273 0.000000e+00 Yes
f_hs_oopexp10 f_hs_oopexp10 0.07968 0.000000e+00 Yes
f_eco_gdpdefl_linked f_eco_gdpdefl_linked 0.07069 0.000000e+00 Yes
f_eco_cpi f_eco_cpi 0.10108 0.000000e+00 Yes
f_original_gdp f_original_gdp 0.10264 0.000000e+00 Yes
f_original_gini f_original_gini 0.11561 0.000000e+00 Yes
hardship_Finance_index hardship_Finance_index 0.13907 0.000000e+00 Yes
c_bh_homicide c_bh_homicide 0.09742 0.000000e+00 Yes
c_bh_childmalt c_bh_childmalt 0.09575 0.000000e+00 Yes
c_bh_violextchildprot c_bh_violextchildprot 0.09740 0.000000e+00 Yes
c_bh_parviolenceprog c_bh_parviolenceprog 0.04102 0.000000e+00 Yes
c_bh_elderabuse c_bh_elderabuse 0.08784 0.000000e+00 Yes
c_theft_estcorruption c_theft_estcorruption 0.06759 0.000000e+00 Yes
c_oth_polstab c_oth_polstab 0.08484 0.000000e+00 Yes
hardship_Crime_index hardship_Crime_index 0.11775 0.000000e+00 Yes
e_oth_drinkingwater e_oth_drinkingwater 0.10202 0.000000e+00 Yes
e_exp_watersanithyg100k e_exp_watersanithyg100k 0.12385 0.000000e+00 Yes
e_ses_gini e_ses_gini 0.09615 0.000000e+00 Yes
e_ses_school e_ses_school 0.04925 0.000000e+00 Yes
e_exp_disaster e_exp_disaster -0.00702 8.810206e-04 Yes
e_exp_airdeath100k e_exp_airdeath100k 0.11396 0.000000e+00 Yes
e_exp_watersanithyg e_exp_watersanithyg 0.09110 0.000000e+00 Yes
hardship_environment_index hardship_environment_index 0.12948 0.000000e+00 Yes
age_numeric age_numeric -0.23007 0.000000e+00 Yes

Setup for specifications - all specifications

library(specr)

specification <- setup(
  data = hardship_combined,
  y = "risktaking",  # abhängige Variable
  x = c("HS_alc_tax_wine", "HS_alc_roaddeath", "HS_drg_treatment", "HS_nic_affordability", "HS_mh_policy", "HS_sex_gini",
        "HS_oth_obesity", "HS_oth_cleancooking", "HS_mh_mhhospit", "HS_sex_antiretroviral", "HS_original_lifeexpectancy",
        "HS_original_genderequality",
        "f_inv_acctownership_primaryedu", "f_oth_insfinsvcs_int", "f_hs_oopexp10", "f_eco_gdpdefl_linked", "f_eco_cpi",
        "f_original_gdp", "f_original_gini",
        "c_bh_homicide", "c_bh_childmalt", "c_bh_violextchildprot", "c_bh_parviolenceprog", "c_bh_elderabuse",
        "c_theft_estcorruption", "c_oth_polstab", 
        "e_oth_drinkingwater", "e_exp_watersanithyg100k", "e_ses_gini", "e_ses_school", "e_exp_disaster",  "e_exp_airdeath100k",
        "e_exp_watersanithyg"),
  controls = c("age_scale", "COUNTRY"), 
  model = "lm"
)

# Zusammenfassung der Spezifikationen
summary(specification)
## Setup for the Specification Curve Analysis
## -------------------------------------------
## Class:                      specr.setup -- version: 1.0.1 
## Number of specifications:   132 
## 
## Specifications:
## 
##   Independent variable:     HS_alc_tax_wine, HS_alc_roaddeath, HS_drg_treatment, HS_nic_affordability, HS_mh_policy, HS_sex_gini, HS_oth_obesity, HS_oth_cleancooking, HS_mh_mhhospit, HS_sex_antiretroviral, HS_original_lifeexpectancy, HS_original_genderequality, f_inv_acctownership_primaryedu, f_oth_insfinsvcs_int, f_hs_oopexp10, f_eco_gdpdefl_linked, f_eco_cpi, f_original_gdp, f_original_gini, c_bh_homicide, c_bh_childmalt, c_bh_violextchildprot, c_bh_parviolenceprog, c_bh_elderabuse, c_theft_estcorruption, c_oth_polstab, e_oth_drinkingwater, e_exp_watersanithyg100k, e_ses_gini, e_ses_school, e_exp_disaster, e_exp_airdeath100k, e_exp_watersanithyg 
##   Dependent variable:       risktaking 
##   Models:                   lm 
##   Covariates:               no covariates, age_scale, COUNTRY, age_scale + COUNTRY 
##   Subsets analyses:         all 
## 
## Function used to extract parameters:
## 
##   function (x) 
## broom::tidy(x, conf.int = TRUE)
## <environment: 0x130cf04e8>
## 
## 
## Head of specifications table (first 6 rows):
## # A tibble: 6 × 6
##   x                y          model controls            subsets formula         
##   <chr>            <chr>      <chr> <chr>               <chr>   <glue>          
## 1 HS_alc_tax_wine  risktaking lm    no covariates       all     risktaking ~ HS…
## 2 HS_alc_tax_wine  risktaking lm    age_scale           all     risktaking ~ HS…
## 3 HS_alc_tax_wine  risktaking lm    COUNTRY             all     risktaking ~ HS…
## 4 HS_alc_tax_wine  risktaking lm    age_scale + COUNTRY all     risktaking ~ HS…
## 5 HS_alc_roaddeath risktaking lm    no covariates       all     risktaking ~ HS…
## 6 HS_alc_roaddeath risktaking lm    age_scale           all     risktaking ~ HS…

run specifications

specification_results <- specr(specification)
specification_results
## Models fitted based on 132 specifications
## Number of cores used: 1 
## 
## Descriptive summary of the specification curve:
## 
##  median  mad    min  max   q25  q75
##    0.26 1.03 -107.4 5.32 -1.64 0.78
summary(specification_results, digits = 5)
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    2.964 sec elapsed 
##   Number of specifications:       132 
## 
## Descriptive summary of the specification curve:
## 
##   median   mad       min     max      q25     q75
##  0.25763 1.035 -107.4022 5.31884 -1.63972 0.78408
## 
## Descriptive summary of sample sizes: 
## 
##  median    min    max
##  224583 221536 225551
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…    0.917    0.0229     40.0 
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…    0.593    0.0225     26.4 
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta…   -0.903    0.172      -5.27
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta…   -1.10     0.168      -6.57
## 5 HS_alc_road… risk… lm    no cova… all     riskta…    0.853    0.0232     36.8 
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…    0.549    0.0227     24.2 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>

summarizing the parameter distribution

summary(specification_results, type = "curve")
## # A tibble: 1 × 7
##   median   mad   min   max   q25   q75    obs
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1  0.258  1.03 -107.  5.32 -1.64 0.784 224583
summary(specification_results, 
        type = "curve", 
        group = "x",           
        stats = c("median", "mean", "min", "max"))  # Statistiken in einem Vektor auflisten
## # A tibble: 33 × 6
##    x                            median     mean      min    max    obs
##    <chr>                         <dbl>    <dbl>    <dbl>  <dbl>  <dbl>
##  1 HS_alc_roaddeath             0.745    0.723     0.549  0.853 224583
##  2 HS_alc_tax_wine             -0.155   -0.124    -1.10   0.917 224583
##  3 HS_drg_treatment             1.54     1.60      0.520  2.78  224583
##  4 HS_mh_mhhospit              -1.74    -1.84     -4.21   0.327 224583
##  5 HS_mh_policy                -2.98    -3.08     -6.20  -0.170 224583
##  6 HS_nic_affordability         1.56     1.58      0.403  2.82  224583
##  7 HS_original_genderequality   0.0504   0.130    -0.257  0.677 224583
##  8 HS_original_lifeexpectancy  -0.0705   0.0230   -1.16   1.40  224583
##  9 HS_oth_cleancooking         -2.56    -2.76     -7.02   1.09  224583
## 10 HS_oth_obesity             -43.9    -48.8    -107.     0.169 224583
## # ℹ 23 more rows

Plots

plot(specification_results)

(a <- plot(specification_results, type = "curve", ci = F, ribbon = T) + 
   geom_point(size = 4))

(b <- plot(specification_results, type = "choices", choices = c("x", "y", "model", "controls")) +
   geom_point(size = 2, shape = 4)) 

(c <- plot(specification_results, type = "samplesizes") + ylim(0, 400))

plot_grid(a, b, c, ncol = 1,
          align = "v",
          rel_heights = c(1.5, 2, 0.8),
          axis = "rbl")

plot(specification_results, type = "boxplot") + 
  geom_point(alpha = .4) + 
  scale_fill_brewer(palette = "Pastel2") +
  labs(x = "Effect size", fill = "")

Subsetting data for males

specification_males <- setup(
  data = hardship_combined %>%
           filter(gender == 1),  # Filter for males
  y = "risktaking",
  x = c("HS_alc_tax_wine", "HS_alc_roaddeath", "HS_drg_treatment", "HS_nic_affordability", "HS_mh_policy", "HS_sex_gini",
        "HS_oth_obesity", "HS_oth_cleancooking", "HS_mh_mhhospit", "HS_sex_antiretroviral", "HS_original_lifeexpectancy",
        "HS_original_genderequality",
        "f_inv_acctownership_primaryedu", "f_oth_insfinsvcs_int", "f_hs_oopexp10", "f_eco_gdpdefl_linked", "f_eco_cpi",
        "f_original_gdp", "f_original_gini",
        "c_bh_homicide", "c_bh_childmalt", "c_bh_violextchildprot", "c_bh_parviolenceprog", "c_bh_elderabuse",
        "c_theft_estcorruption", "c_oth_polstab", 
        "e_oth_drinkingwater", "e_exp_watersanithyg100k", "e_ses_gini", "e_ses_school", "e_exp_disaster",  "e_exp_airdeath100k",
        "e_exp_watersanithyg"),
  controls = c("age_scale", "COUNTRY"), 
  model = "lm"
)

# Run the specifications for males
specification_results_males <- specr(specification_males)

# View the summary of the results
summary(specification_results_males)
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    1.478 sec elapsed 
##   Number of specifications:       132 
## 
## Descriptive summary of the specification curve:
## 
##  median  mad    min  max   q25  q75
##    0.33 1.04 -83.77 4.15 -1.19 0.83
## 
## Descriptive summary of sample sizes: 
## 
##  median    min    max
##  119096 117485 119591
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…     0.93      0.03     29.5 
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…     0.58      0.03     18.8 
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta…    -0.51      0.24     -2.16
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta…    -0.86      0.23     -3.69
## 5 HS_alc_road… risk… lm    no cova… all     riskta…     1         0.03     31.2 
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…     0.69      0.03     22.2 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>

Plots for male subset results

plot(specification_results_males)

(a_male <- plot(specification_results_males, type = "curve", ci = F, ribbon = T) + 
   geom_point(size = 4))

(b_male <- plot(specification_results_males, type = "choices", choices = c("x", "y", "model", "controls")) +
   geom_point(size = 2, shape = 4)) 

(c_male <- plot(specification_results_males, type = "samplesizes") + ylim(0, 400))

plot_grid(a_male, b_male, c_male, ncol = 1,
          align = "v",
          rel_heights = c(1.5, 2, 0.8),
          axis = "rbl")

plot(specification_results_males, type = "boxplot") + 
  geom_point(alpha = .4) + 
  scale_fill_brewer(palette = "Pastel2") +
  labs(x = "Effect size", fill = "")

Subsetting data for females

specification_females <- setup(
  data = hardship_combined %>%
           filter(gender == 0),  # Filter for females
  y = "risktaking",
  x = c("HS_alc_tax_wine", "HS_alc_roaddeath", "HS_drg_treatment", "HS_nic_affordability", "HS_mh_policy", "HS_sex_gini",
        "HS_oth_obesity", "HS_oth_cleancooking", "HS_mh_mhhospit", "HS_sex_antiretroviral", "HS_original_lifeexpectancy",
        "HS_original_genderequality",
        "f_inv_acctownership_primaryedu", "f_oth_insfinsvcs_int", "f_hs_oopexp10", "f_eco_gdpdefl_linked", "f_eco_cpi",
        "f_original_gdp", "f_original_gini",
        "c_bh_homicide", "c_bh_childmalt", "c_bh_violextchildprot", "c_bh_parviolenceprog", "c_bh_elderabuse",
        "c_theft_estcorruption", "c_oth_polstab", 
        "e_oth_drinkingwater", "e_exp_watersanithyg100k", "e_ses_gini", "e_ses_school", "e_exp_disaster",  "e_exp_airdeath100k",
        "e_exp_watersanithyg"),
  controls = c("age_scale", "COUNTRY"), 
  model = "lm"
)

# Run the specifications for females
specification_results_females <- specr(specification_females)

# View the summary of the results
summary(specification_results_females)
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    1.358 sec elapsed 
##   Number of specifications:       132 
## 
## Descriptive summary of the specification curve:
## 
##  median  mad     min  max   q25  q75
##    0.18 1.17 -138.61 6.86 -2.12 0.65
## 
## Descriptive summary of sample sizes: 
## 
##  median    min    max
##  105487 104051 105960
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…     0.84      0.03     25.4 
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…     0.54      0.03     16.9 
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta…    -1.38      0.24     -5.67
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta…    -1.42      0.24     -5.98
## 5 HS_alc_road… risk… lm    no cova… all     riskta…     0.69      0.03     20.9 
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…     0.39      0.03     12.0 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>

Plots for female subset results

plot(specification_results_females)

(a_female <- plot(specification_results_females, type = "curve", ci = F, ribbon = T) + 
   geom_point(size = 4))

(b_female <- plot(specification_results_females, type = "choices", choices = c("x", "y", "model", "controls")) +
   geom_point(size = 2, shape = 4)) 

(c_female <- plot(specification_results_females, type = "samplesizes") + ylim(0, 400))

plot_grid(a_female, b_female, c_female, ncol = 1,
          align = "v",
          rel_heights = c(1.5, 2, 0.8),
          axis = "rbl")

plot(specification_results_females, type = "boxplot") + 
  geom_point(alpha = .4) + 
  scale_fill_brewer(palette = "Pastel2") +
  labs(x = "Effect size", fill = "")

Subsetting data for age-categories

run_specification_for_age <- function(data, age_id, age_label) {
  # Daten für die spezifische Altersgruppe filtern
  data_subset <- data %>%
    filter(age_numeric == age_id)
  
  # Setup für die Spezifikationen durchführen
  specification <- setup(
    data = data_subset,
    y = "risktaking",
    x = c("HS_alc_tax_wine", "HS_alc_roaddeath", "HS_drg_treatment", "HS_nic_affordability", "HS_mh_policy", "HS_sex_gini",
        "HS_oth_obesity", "HS_oth_cleancooking", "HS_mh_mhhospit", "HS_sex_antiretroviral", "HS_original_lifeexpectancy",
        "HS_original_genderequality",
        "f_inv_acctownership_primaryedu", "f_oth_insfinsvcs_int", "f_hs_oopexp10", "f_eco_gdpdefl_linked", "f_eco_cpi",
        "f_original_gdp", "f_original_gini",
        "c_bh_homicide", "c_bh_childmalt", "c_bh_violextchildprot", "c_bh_parviolenceprog", "c_bh_elderabuse",
        "c_theft_estcorruption", "c_oth_polstab", 
        "e_oth_drinkingwater", "e_exp_watersanithyg100k", "e_ses_gini", "e_ses_school", "e_exp_disaster",  "e_exp_airdeath100k",
        "e_exp_watersanithyg"),
    controls = c("age_scale", "COUNTRY"), 
    model = "lm"
  )

  # Spezifikationsergebnisse berechnen
  specification_results <- specr(specification)

  # Statistische Auswertungen drucken mit Alterskategorie-Titel
  cat("\nStatistische Ergebnisse für die Alterskategorie:", age_label, "\n")
  print(summary(specification_results, digits = 5))

  # Grafiken für die spezifische Altersgruppe erzeugen und anzeigen
  plot_list <- list(
    plot_a = plot(specification_results, type = "curve", ci = FALSE, ribbon = TRUE) + 
             geom_point(size = 4) + ggtitle(paste("Curve Plot -", age_label)),
    plot_b = plot(specification_results, type = "choices", choices = c("x", "y", "model", "controls")) + 
             geom_point(size = 2, shape = 4) + ggtitle(paste("Choices Plot -", age_label)),
    plot_c = plot(specification_results, type = "samplesizes") + ylim(0, 400) +
             ggtitle(paste("Sample Sizes Plot -", age_label)),
    plot_d = plot(specification_results, type = "boxplot") + 
             geom_point(alpha = .4) + scale_fill_brewer(palette = "Pastel2") + 
             labs(x = "Effect size", fill = "") + ggtitle(paste("Boxplot -", age_label))
  )

  # Rückgabe der Ergebnisse und Plots
  return(list(summary = summary(specification_results, digits = 5), plots = plot_list))
}

# Funktion für jede Altersgruppe aufrufen und sowohl statistische Zusammenfassungen als auch Plots ausgeben
for (i in 1:4) {
  results <- run_specification_for_age(hardship_combined, i, paste("Age Group", i))
  print(results$summary)  # Drucke die Zusammenfassung der Ergebnisse
  print(results$plots$plot_a)
  print(results$plots$plot_b)
  print(results$plots$plot_c)
  print(results$plots$plot_d)
}
## 
## Statistische Ergebnisse für die Alterskategorie: Age Group 1 
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    33.124 sec elapsed 
##   Number of specifications:       132 
## 
## Descriptive summary of the specification curve:
## 
##    median     mad       min     max      q25     q75
##  -0.32343 1.20095 -145.7416 7.21629 -2.28239 0.07565
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   45989 45836 46135
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…  0.359      0.0497    7.21  
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…  0.369      0.0497    7.42  
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta… -1.50       0.264    -5.68  
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta… -1.50       0.263    -5.67  
## 5 HS_alc_road… risk… lm    no cova… all     riskta…  0.00901    0.0444    0.203 
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…  0.00096    0.0444    0.0215
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    33.124 sec elapsed 
##   Number of specifications:       132 
## 
## Descriptive summary of the specification curve:
## 
##    median     mad       min     max      q25     q75
##  -0.32343 1.20095 -145.7416 7.21629 -2.28239 0.07565
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   45989 45836 46135
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…  0.359      0.0497    7.21  
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…  0.369      0.0497    7.42  
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta… -1.50       0.264    -5.68  
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta… -1.50       0.263    -5.67  
## 5 HS_alc_road… risk… lm    no cova… all     riskta…  0.00901    0.0444    0.203 
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…  0.00096    0.0444    0.0215
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL

## 
## Statistische Ergebnisse für die Alterskategorie: Age Group 2 
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    1.149 sec elapsed 
##   Number of specifications:       132 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad      min     max      q25     q75
##  0.24468 0.92786 -100.426 4.97252 -1.60268 0.74144
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   92746 91915 93221
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…    0.586    0.0352     16.6 
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…    0.560    0.0351     16.0 
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta…   -1.03     0.251      -4.11
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta…   -0.988    0.251      -3.94
## 5 HS_alc_road… risk… lm    no cova… all     riskta…    0.735    0.0340     21.6 
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…    0.663    0.0339     19.5 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    1.149 sec elapsed 
##   Number of specifications:       132 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad      min     max      q25     q75
##  0.24468 0.92786 -100.426 4.97252 -1.60268 0.74144
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   92746 91915 93221
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…    0.586    0.0352     16.6 
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…    0.560    0.0351     16.0 
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta…   -1.03     0.251      -4.11
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta…   -0.988    0.251      -3.94
## 5 HS_alc_road… risk… lm    no cova… all     riskta…    0.735    0.0340     21.6 
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…    0.663    0.0339     19.5 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL

## 
## Statistische Ergebnisse für die Alterskategorie: Age Group 3 
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    48.388 sec elapsed 
##   Number of specifications:       132 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad     min      max     q25     q75
##  0.36021 0.49885 -9.0884 13.84448 0.09418 0.76967
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   62961 61551 63153
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…   0.752     0.0430    17.5  
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…   0.713     0.0430    16.6  
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta…   0.142     0.522      0.272
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta…   0.0533    0.521      0.102
## 5 HS_alc_road… risk… lm    no cova… all     riskta…   0.890     0.0484    18.4  
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…   0.854     0.0483    17.7  
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    48.388 sec elapsed 
##   Number of specifications:       132 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad     min      max     q25     q75
##  0.36021 0.49885 -9.0884 13.84448 0.09418 0.76967
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   62961 61551 63153
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…   0.752     0.0430    17.5  
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…   0.713     0.0430    16.6  
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta…   0.142     0.522      0.272
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta…   0.0533    0.521      0.102
## 5 HS_alc_road… risk… lm    no cova… all     riskta…   0.890     0.0484    18.4  
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…   0.854     0.0483    17.7  
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL

## 
## Statistische Ergebnisse für die Alterskategorie: Age Group 4 
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    16.967 sec elapsed 
##   Number of specifications:       132 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad       min     max     q25     q75
##  0.98507 1.36967 -12.68596 256.176 0.42226 3.59018
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   23007 22234 23042
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…    0.912    0.0674     13.5 
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…    0.896    0.0674     13.3 
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta…    2.56     1.42        1.81
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta…    2.63     1.42        1.86
## 5 HS_alc_road… risk… lm    no cova… all     riskta…    0.948    0.0879     10.8 
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…    0.942    0.0878     10.7 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    16.967 sec elapsed 
##   Number of specifications:       132 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad       min     max     q25     q75
##  0.98507 1.36967 -12.68596 256.176 0.42226 3.59018
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   23007 22234 23042
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…    0.912    0.0674     13.5 
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…    0.896    0.0674     13.3 
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta…    2.56     1.42        1.81
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta…    2.63     1.42        1.86
## 5 HS_alc_road… risk… lm    no cova… all     riskta…    0.948    0.0879     10.8 
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…    0.942    0.0878     10.7 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL

Hardship Health/Safety

Setup for specifications

library(specr)

# Setup für die Spezifikationen mit einer umfassenderen Auswahl von Variablen
specification <- setup(
  data = hardship_combined,
  y = "risktaking",  # abhängige Variable
    x = c("HS_alc_tax_wine", "HS_alc_roaddeath", 
          "HS_drg_treatment", "HS_nic_affordability", "HS_mh_policy",
          "HS_sex_gini", "HS_oth_obesity", "HS_oth_cleancooking",
          "HS_mh_mhhospit", "HS_sex_antiretroviral",
          "HS_original_lifeexpectancy", "HS_original_genderequality"),
  controls = c("age_scale", "COUNTRY"), 
  model = "lm"
)

# Zusammenfassung der Spezifikationen
summary(specification)
## Setup for the Specification Curve Analysis
## -------------------------------------------
## Class:                      specr.setup -- version: 1.0.1 
## Number of specifications:   48 
## 
## Specifications:
## 
##   Independent variable:     HS_alc_tax_wine, HS_alc_roaddeath, HS_drg_treatment, HS_nic_affordability, HS_mh_policy, HS_sex_gini, HS_oth_obesity, HS_oth_cleancooking, HS_mh_mhhospit, HS_sex_antiretroviral, HS_original_lifeexpectancy, HS_original_genderequality 
##   Dependent variable:       risktaking 
##   Models:                   lm 
##   Covariates:               no covariates, age_scale, COUNTRY, age_scale + COUNTRY 
##   Subsets analyses:         all 
## 
## Function used to extract parameters:
## 
##   function (x) 
## broom::tidy(x, conf.int = TRUE)
## <environment: 0x13554d830>
## 
## 
## Head of specifications table (first 6 rows):
## # A tibble: 6 × 6
##   x                y          model controls            subsets formula         
##   <chr>            <chr>      <chr> <chr>               <chr>   <glue>          
## 1 HS_alc_tax_wine  risktaking lm    no covariates       all     risktaking ~ HS…
## 2 HS_alc_tax_wine  risktaking lm    age_scale           all     risktaking ~ HS…
## 3 HS_alc_tax_wine  risktaking lm    COUNTRY             all     risktaking ~ HS…
## 4 HS_alc_tax_wine  risktaking lm    age_scale + COUNTRY all     risktaking ~ HS…
## 5 HS_alc_roaddeath risktaking lm    no covariates       all     risktaking ~ HS…
## 6 HS_alc_roaddeath risktaking lm    age_scale           all     risktaking ~ HS…

run specifications

specification_results <- specr(specification)
specification_results
## Models fitted based on 48 specifications
## Number of cores used: 1 
## 
## Descriptive summary of the specification curve:
## 
##  median  mad    min  max   q25  q75
##    0.23 0.92 -107.4 2.82 -0.99 0.79
summary(specification_results, digits = 5)
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    1.044 sec elapsed 
##   Number of specifications:       48 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad       min     max      q25    q75
##  0.22763 0.91529 -107.4022 2.82195 -0.99022 0.7892
## 
## Descriptive summary of sample sizes: 
## 
##  median    min    max
##  224583 224583 224583
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…    0.917    0.0229     40.0 
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…    0.593    0.0225     26.4 
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta…   -0.903    0.172      -5.27
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta…   -1.10     0.168      -6.57
## 5 HS_alc_road… risk… lm    no cova… all     riskta…    0.853    0.0232     36.8 
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…    0.549    0.0227     24.2 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>

summarizing the parameter distribution

summary(specification_results, type = "curve")
## # A tibble: 1 × 7
##   median   mad   min   max    q25   q75    obs
##    <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl>  <dbl>
## 1  0.228 0.915 -107.  2.82 -0.990 0.789 224583
summary(specification_results, 
        type = "curve", 
        group = "x",           
        stats = c("median", "mean", "min", "max"))  # Statistiken in einem Vektor auflisten
## # A tibble: 12 × 6
##    x                            median     mean      min    max    obs
##    <chr>                         <dbl>    <dbl>    <dbl>  <dbl>  <dbl>
##  1 HS_alc_roaddeath             0.745    0.723     0.549  0.853 224583
##  2 HS_alc_tax_wine             -0.155   -0.124    -1.10   0.917 224583
##  3 HS_drg_treatment             1.54     1.60      0.520  2.78  224583
##  4 HS_mh_mhhospit              -1.74    -1.84     -4.21   0.327 224583
##  5 HS_mh_policy                -2.98    -3.08     -6.20  -0.170 224583
##  6 HS_nic_affordability         1.56     1.58      0.403  2.82  224583
##  7 HS_original_genderequality   0.0504   0.130    -0.257  0.677 224583
##  8 HS_original_lifeexpectancy  -0.0705   0.0230   -1.16   1.40  224583
##  9 HS_oth_cleancooking         -2.56    -2.76     -7.02   1.09  224583
## 10 HS_oth_obesity             -43.9    -48.8    -107.     0.169 224583
## 11 HS_sex_antiretroviral       -0.231   -0.167    -0.826  0.618 224583
## 12 HS_sex_gini                 -0.918   -0.885    -2.54   0.837 224583

Plots

plot(specification_results)

(a <- plot(specification_results, type = "curve", ci = F, ribbon = T) + 
   geom_point(size = 4))

(b <- plot(specification_results, type = "choices", choices = c("x", "y", "model", "controls")) +
   geom_point(size = 2, shape = 4)) 

(c <- plot(specification_results, type = "samplesizes") + ylim(0, 400))

plot_grid(a, b, c, ncol = 1,
          align = "v",
          rel_heights = c(1.5, 2.5, 0.8),
          axis = "rbl")

plot(specification_results, type = "boxplot") + 
  geom_point(alpha = .4) + 
  scale_fill_brewer(palette = "Pastel2") +
  labs(x = "Effect size", fill = "")

# Lade dplyr für die Datenmanipulation
library(dplyr)

# Extrahiere die Ergebnisse aus dem specr-Objekt
results_df <- specification_results$data

# Erstelle die Zusammenfassung der Spezifikationsanalyse
summary_table <- results_df %>%
  group_by(x, controls) %>%  # Gruppiere nach Risikofaktor (x) und Kontrollvariablen (controls)
  summarise(
    Median = round(median(estimate, na.rm = TRUE), 2),
    Min = round(min(estimate, na.rm = TRUE), 2),
    Max = round(max(estimate, na.rm = TRUE), 2),
    Q25 = round(quantile(estimate, 0.25, na.rm = TRUE), 2),
    Q75 = round(quantile(estimate, 0.75, na.rm = TRUE), 2),
    Significant_Perc = round(mean(p.value < 0.05, na.rm = TRUE) * 100, 2)
  ) %>%
  arrange(desc(Significant_Perc))  

# Zeige die Tabelle an
print(summary_table)
## # A tibble: 48 × 8
## # Groups:   x [12]
##    x                controls     Median   Min   Max   Q25   Q75 Significant_Perc
##    <chr>            <chr>         <dbl> <dbl> <dbl> <dbl> <dbl>            <dbl>
##  1 HS_alc_roaddeath COUNTRY        0.67  0.67  0.67  0.67  0.67              100
##  2 HS_alc_roaddeath age_scale      0.55  0.55  0.55  0.55  0.55              100
##  3 HS_alc_roaddeath age_scale +…   0.82  0.82  0.82  0.82  0.82              100
##  4 HS_alc_roaddeath no covariat…   0.85  0.85  0.85  0.85  0.85              100
##  5 HS_alc_tax_wine  COUNTRY       -0.9  -0.9  -0.9  -0.9  -0.9               100
##  6 HS_alc_tax_wine  age_scale      0.59  0.59  0.59  0.59  0.59              100
##  7 HS_alc_tax_wine  age_scale +…  -1.1  -1.1  -1.1  -1.1  -1.1               100
##  8 HS_alc_tax_wine  no covariat…   0.92  0.92  0.92  0.92  0.92              100
##  9 HS_drg_treatment COUNTRY        2.3   2.3   2.3   2.3   2.3               100
## 10 HS_drg_treatment age_scale      0.52  0.52  0.52  0.52  0.52              100
## # ℹ 38 more rows

Subsetting data for males

specification_males <- setup(
  data = hardship_combined %>%
           filter(gender == 1),  # Filter for males
  y = "risktaking",
    x = c("HS_alc_tax_wine", "HS_alc_roaddeath", 
          "HS_drg_treatment", "HS_nic_affordability", "HS_mh_policy",
          "HS_sex_gini", "HS_oth_obesity", "HS_oth_cleancooking",
          "HS_mh_mhhospit", "HS_sex_antiretroviral",
          "HS_original_lifeexpectancy", "HS_original_genderequality"),
  controls = c("age_scale", "COUNTRY"), 
  model = "lm"
)

# Run the specifications for males
specification_results_males <- specr(specification_males)

# View the summary of the results
summary(specification_results_males)
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    32.305 sec elapsed 
##   Number of specifications:       48 
## 
## Descriptive summary of the specification curve:
## 
##  median  mad    min  max  q25  q75
##     0.3 0.97 -83.77 3.31 -0.7 0.81
## 
## Descriptive summary of sample sizes: 
## 
##  median    min    max
##  119096 119096 119096
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…     0.93      0.03     29.5 
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…     0.58      0.03     18.8 
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta…    -0.51      0.24     -2.16
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta…    -0.86      0.23     -3.69
## 5 HS_alc_road… risk… lm    no cova… all     riskta…     1         0.03     31.2 
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…     0.69      0.03     22.2 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>

Plots for male subset results

plot(specification_results_males)

(a_male <- plot(specification_results_males, type = "curve", ci = F, ribbon = T) + 
   geom_point(size = 4))

(b_male <- plot(specification_results_males, type = "choices", choices = c("x", "y", "model", "controls")) +
   geom_point(size = 2, shape = 4)) 

(c_male <- plot(specification_results_males, type = "samplesizes") + ylim(0, 400))

plot_grid(a_male, b_male, c_male, ncol = 1,
          align = "v",
          rel_heights = c(1.5, 2, 0.8),
          axis = "rbl")

plot(specification_results_males, type = "boxplot") + 
  geom_point(alpha = .4) + 
  scale_fill_brewer(palette = "Pastel2") +
  labs(x = "Effect size", fill = "")

# Lade die notwendigen Pakete
library(dplyr)
library(flextable)
library(officer)

# Extrahiere die Ergebnisse aus dem specr-Objekt
results_df <- specification_results_males$data

# Erstelle die Zusammenfassung der Spezifikationsanalyse
summary_table <- results_df %>%
  group_by(x, controls) %>%  # Gruppiere nach Risikofaktor (x) und Kontrollvariablen (controls)
  summarise(
    Median = round(median(estimate, na.rm = TRUE), 5),
    Min = round(min(estimate, na.rm = TRUE), 5),
    Max = round(max(estimate, na.rm = TRUE), 5),
    Q25 = round(quantile(estimate, 0.25, na.rm = TRUE), 5),
    Q75 = round(quantile(estimate, 0.75, na.rm = TRUE), 5),
    Significant_Perc = round(mean(p.value < 0.05, na.rm = TRUE) * 100, 5)  # Prozentualer Anteil signifikanter Werte
  ) %>%
  arrange(desc(Significant_Perc))  # Sortiere nach Signifikanzrate

# Erstelle eine formatierte flextable
summary_flextable <- flextable(summary_table) %>%
  theme_vanilla() %>%  # Optische Gestaltung
  set_table_properties(width = 1, layout = "autofit") %>%
  align(align = "center", part = "all") %>%
  bold(part = "header") %>%
  autofit()

# Erstelle ein neues Word-Dokument
doc <- read_docx()

# Füge die Tabelle in das Dokument ein
doc <- body_add_flextable(doc, summary_flextable)

# Speichere das Dokument
print(doc, target = "Specification_Summary_HS_male.docx")

Subsetting data for females

specification_females <- setup(
  data = hardship_combined %>%
           filter(gender == 0),  # Filter for females
  y = "risktaking",
    x = c("HS_alc_tax_wine", "HS_alc_roaddeath", 
          "HS_drg_treatment", "HS_nic_affordability", "HS_mh_policy",
          "HS_sex_gini", "HS_oth_obesity", "HS_oth_cleancooking",
          "HS_mh_mhhospit", "HS_sex_antiretroviral",
          "HS_original_lifeexpectancy", "HS_original_genderequality"),
  controls = c("age_scale", "COUNTRY"), 
  model = "lm"
)

# Run the specifications for females
specification_results_females <- specr(specification_females)

# View the summary of the results
summary(specification_results_females)
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    31.21 sec elapsed 
##   Number of specifications:       48 
## 
## Descriptive summary of the specification curve:
## 
##  median  mad     min  max   q25  q75
##    0.13 1.04 -138.61 3.64 -1.43 0.62
## 
## Descriptive summary of sample sizes: 
## 
##  median    min    max
##  105487 105487 105487
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…     0.84      0.03     25.4 
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…     0.54      0.03     16.9 
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta…    -1.38      0.24     -5.67
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta…    -1.42      0.24     -5.98
## 5 HS_alc_road… risk… lm    no cova… all     riskta…     0.69      0.03     20.9 
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…     0.39      0.03     12.0 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>

Plots for female subset results

plot(specification_results_females)

(a_female <- plot(specification_results_females, type = "curve", ci = F, ribbon = T) + 
   geom_point(size = 4))

(b_female <- plot(specification_results_females, type = "choices", choices = c("x", "y", "model", "controls")) +
   geom_point(size = 2, shape = 4)) 

(c_female <- plot(specification_results_females, type = "samplesizes") + ylim(0, 400))

plot_grid(a_female, b_female, c_female, ncol = 1,
          align = "v",
          rel_heights = c(1.5, 2, 0.8),
          axis = "rbl")

plot(specification_results_females, type = "boxplot") + 
  geom_point(alpha = .4) + 
  scale_fill_brewer(palette = "Pastel2") +
  labs(x = "Effect size", fill = "")

# Lade die notwendigen Pakete
library(dplyr)
library(flextable)
library(officer)

# Extrahiere die Ergebnisse aus dem specr-Objekt
results_df <- specification_results_females$data

# Erstelle die Zusammenfassung der Spezifikationsanalyse
summary_table <- results_df %>%
  group_by(x, controls) %>%  # Gruppiere nach Risikofaktor (x) und Kontrollvariablen (controls)
  summarise(
    Median = round(median(estimate, na.rm = TRUE), 5),
    Min = round(min(estimate, na.rm = TRUE), 5),
    Max = round(max(estimate, na.rm = TRUE), 5),
    Q25 = round(quantile(estimate, 0.25, na.rm = TRUE), 5),
    Q75 = round(quantile(estimate, 0.75, na.rm = TRUE), 5),
    Significant_Perc = round(mean(p.value < 0.05, na.rm = TRUE) * 100, 5)  # Prozentualer Anteil signifikanter Werte
  ) %>%
  arrange(desc(Significant_Perc))  # Sortiere nach Signifikanzrate

# Erstelle eine formatierte flextable
summary_flextable <- flextable(summary_table) %>%
  theme_vanilla() %>%  # Optische Gestaltung
  set_table_properties(width = 1, layout = "autofit") %>%
  align(align = "center", part = "all") %>%
  bold(part = "header") %>%
  autofit()

# Erstelle ein neues Word-Dokument
doc <- read_docx()

# Füge die Tabelle in das Dokument ein
doc <- body_add_flextable(doc, summary_flextable)

# Speichere das Dokument
print(doc, target = "Specification_Summary_HS_female.docx")

Subsetting data for age-categories

run_specification_for_age <- function(data, age_id, age_label) {
  # Daten für die spezifische Altersgruppe filtern
  data_subset <- data %>%
    filter(age_numeric == age_id)
  
  # Setup für die Spezifikationen durchführen
  specification <- setup(
    data = data_subset,
    y = "risktaking",
    x = c("HS_alc_tax_wine", "HS_alc_roaddeath", 
          "HS_drg_treatment", "HS_nic_affordability", "HS_mh_policy",
          "HS_sex_gini", "HS_oth_obesity", "HS_oth_cleancooking",
          "HS_mh_mhhospit", "HS_sex_antiretroviral",
          "HS_original_lifeexpectancy", "HS_original_genderequality"),
    controls = c("age_scale", "COUNTRY"), 
    model = "lm"
  )

  # Spezifikationsergebnisse berechnen
  specification_results <- specr(specification)

  # Statistische Auswertungen drucken mit Alterskategorie-Titel
  cat("\nStatistische Ergebnisse für die Alterskategorie:", age_label, "\n")
  print(summary(specification_results, digits = 5))

  # Grafiken für die spezifische Altersgruppe erzeugen und anzeigen
  plot_list <- list(
    plot_a = plot(specification_results, type = "curve", ci = FALSE, ribbon = TRUE) + 
             geom_point(size = 4) + ggtitle(paste("Curve Plot -", age_label)),
    plot_b = plot(specification_results, type = "choices", choices = c("x", "y", "model", "controls")) + 
             geom_point(size = 2, shape = 4) + ggtitle(paste("Choices Plot -", age_label)),
    plot_c = plot(specification_results, type = "samplesizes") + ylim(0, 400) +
             ggtitle(paste("Sample Sizes Plot -", age_label)),
    plot_d = plot(specification_results, type = "boxplot") + 
             geom_point(alpha = .4) + scale_fill_brewer(palette = "Pastel2") + 
             labs(x = "Effect size", fill = "") + ggtitle(paste("Boxplot -", age_label))
  )

  # Rückgabe der Ergebnisse und Plots
  return(list(summary = summary(specification_results, digits = 5), plots = plot_list))
}

# Funktion für jede Altersgruppe aufrufen und sowohl statistische Zusammenfassungen als auch Plots ausgeben
for (i in 1:4) {
  results <- run_specification_for_age(hardship_combined, i, paste("Age Group", i))
  print(results$summary)  # Drucke die Zusammenfassung der Ergebnisse
  print(results$plots$plot_a)
  print(results$plots$plot_b)
  print(results$plots$plot_c)
  print(results$plots$plot_d)
}
## 
## Statistische Ergebnisse für die Alterskategorie: Age Group 1 
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    12.383 sec elapsed 
##   Number of specifications:       48 
## 
## Descriptive summary of the specification curve:
## 
##    median     mad       min    max      q25     q75
##  -0.35705 1.10239 -145.7416 3.8293 -1.51768 0.01606
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   45869 45869 45869
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…  0.359      0.0497    7.21  
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…  0.369      0.0497    7.42  
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta… -1.50       0.264    -5.68  
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta… -1.50       0.263    -5.67  
## 5 HS_alc_road… risk… lm    no cova… all     riskta…  0.00901    0.0444    0.203 
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…  0.00096    0.0444    0.0215
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    12.383 sec elapsed 
##   Number of specifications:       48 
## 
## Descriptive summary of the specification curve:
## 
##    median     mad       min    max      q25     q75
##  -0.35705 1.10239 -145.7416 3.8293 -1.51768 0.01606
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   45869 45869 45869
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…  0.359      0.0497    7.21  
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…  0.369      0.0497    7.42  
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta… -1.50       0.264    -5.68  
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta… -1.50       0.263    -5.67  
## 5 HS_alc_road… risk… lm    no cova… all     riskta…  0.00901    0.0444    0.203 
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…  0.00096    0.0444    0.0215
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL

## 
## Statistische Ergebnisse für die Alterskategorie: Age Group 2 
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    25.496 sec elapsed 
##   Number of specifications:       48 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad      min     max      q25     q75
##  0.13771 0.91069 -100.426 2.79922 -1.03474 0.61982
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   92746 92746 92746
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…    0.586    0.0352     16.6 
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…    0.560    0.0351     16.0 
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta…   -1.03     0.251      -4.11
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta…   -0.988    0.251      -3.94
## 5 HS_alc_road… risk… lm    no cova… all     riskta…    0.735    0.0340     21.6 
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…    0.663    0.0339     19.5 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    25.496 sec elapsed 
##   Number of specifications:       48 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad      min     max      q25     q75
##  0.13771 0.91069 -100.426 2.79922 -1.03474 0.61982
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   92746 92746 92746
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…    0.586    0.0352     16.6 
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…    0.560    0.0351     16.0 
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta…   -1.03     0.251      -4.11
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta…   -0.988    0.251      -3.94
## 5 HS_alc_road… risk… lm    no cova… all     riskta…    0.735    0.0340     21.6 
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…    0.663    0.0339     19.5 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL

## 
## Statistische Ergebnisse für die Alterskategorie: Age Group 3 
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    17.704 sec elapsed 
##   Number of specifications:       48 
## 
## Descriptive summary of the specification curve:
## 
##  median    mad     min      max     q25     q75
##  0.4031 0.4784 -9.0884 13.84448 0.09539 0.73856
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   62961 62961 62961
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…   0.752     0.0430    17.5  
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…   0.713     0.0430    16.6  
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta…   0.142     0.522      0.272
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta…   0.0533    0.521      0.102
## 5 HS_alc_road… risk… lm    no cova… all     riskta…   0.890     0.0484    18.4  
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…   0.854     0.0483    17.7  
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    17.704 sec elapsed 
##   Number of specifications:       48 
## 
## Descriptive summary of the specification curve:
## 
##  median    mad     min      max     q25     q75
##  0.4031 0.4784 -9.0884 13.84448 0.09539 0.73856
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   62961 62961 62961
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…   0.752     0.0430    17.5  
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…   0.713     0.0430    16.6  
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta…   0.142     0.522      0.272
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta…   0.0533    0.521      0.102
## 5 HS_alc_road… risk… lm    no cova… all     riskta…   0.890     0.0484    18.4  
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…   0.854     0.0483    17.7  
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL

## 
## Statistische Ergebnisse für die Alterskategorie: Age Group 4 
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    5.944 sec elapsed 
##   Number of specifications:       48 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad      min     max     q25     q75
##  0.94984 1.25041 -9.86834 256.176 0.55435 2.11772
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   23007 23007 23007
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…    0.912    0.0674     13.5 
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…    0.896    0.0674     13.3 
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta…    2.56     1.42        1.81
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta…    2.63     1.42        1.86
## 5 HS_alc_road… risk… lm    no cova… all     riskta…    0.948    0.0879     10.8 
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…    0.942    0.0878     10.7 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    5.944 sec elapsed 
##   Number of specifications:       48 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad      min     max     q25     q75
##  0.94984 1.25041 -9.86834 256.176 0.55435 2.11772
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   23007 23007 23007
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 HS_alc_tax_… risk… lm    no cova… all     riskta…    0.912    0.0674     13.5 
## 2 HS_alc_tax_… risk… lm    age_sca… all     riskta…    0.896    0.0674     13.3 
## 3 HS_alc_tax_… risk… lm    COUNTRY  all     riskta…    2.56     1.42        1.81
## 4 HS_alc_tax_… risk… lm    age_sca… all     riskta…    2.63     1.42        1.86
## 5 HS_alc_road… risk… lm    no cova… all     riskta…    0.948    0.0879     10.8 
## 6 HS_alc_road… risk… lm    age_sca… all     riskta…    0.942    0.0878     10.7 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL

Hardship Finance

Setup for specifications

library(specr)

# Setup für die Spezifikationen mit einer umfassenderen Auswahl von Variablen
specification <- setup(
  data = hardship_combined,
  y = "risktaking",  # abhängige Variable
    x = c("f_inv_acctownership_primaryedu", "f_oth_insfinsvcs_int",
          "f_hs_oopexp10", "f_eco_gdpdefl_linked", "f_eco_cpi",
          "f_original_gdp", "f_original_gini"),
  controls = c("age_scale", "COUNTRY"), 
  model = "lm"
)

# Zusammenfassung der Spezifikationen
summary(specification)
## Setup for the Specification Curve Analysis
## -------------------------------------------
## Class:                      specr.setup -- version: 1.0.1 
## Number of specifications:   28 
## 
## Specifications:
## 
##   Independent variable:     f_inv_acctownership_primaryedu, f_oth_insfinsvcs_int, f_hs_oopexp10, f_eco_gdpdefl_linked, f_eco_cpi, f_original_gdp, f_original_gini 
##   Dependent variable:       risktaking 
##   Models:                   lm 
##   Covariates:               no covariates, age_scale, COUNTRY, age_scale + COUNTRY 
##   Subsets analyses:         all 
## 
## Function used to extract parameters:
## 
##   function (x) 
## broom::tidy(x, conf.int = TRUE)
## <environment: 0x114f6ea58>
## 
## 
## Head of specifications table (first 6 rows):
## # A tibble: 6 × 6
##   x                              y          model controls       subsets formula
##   <chr>                          <chr>      <chr> <chr>          <chr>   <glue> 
## 1 f_inv_acctownership_primaryedu risktaking lm    no covariates  all     riskta…
## 2 f_inv_acctownership_primaryedu risktaking lm    age_scale      all     riskta…
## 3 f_inv_acctownership_primaryedu risktaking lm    COUNTRY        all     riskta…
## 4 f_inv_acctownership_primaryedu risktaking lm    age_scale + C… all     riskta…
## 5 f_oth_insfinsvcs_int           risktaking lm    no covariates  all     riskta…
## 6 f_oth_insfinsvcs_int           risktaking lm    age_scale      all     riskta…

run specifications

specification_results <- specr(specification)
specification_results
## Models fitted based on 28 specifications
## Number of cores used: 1 
## 
## Descriptive summary of the specification curve:
## 
##  median  mad   min  max   q25  q75
##    0.24 1.18 -7.26 1.14 -3.88 0.73
summary(specification_results, digits = 5)
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    35.232 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##  median     mad      min     max      q25     q75
##  0.2402 1.18008 -7.25607 1.14046 -3.87762 0.72707
## 
## Descriptive summary of sample sizes: 
## 
##  median    min    max
##  225551 221536 225551
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 f_inv_accto… risk… lm    no cova… all     riskta…    0.224    0.0218     10.3 
## 2 f_inv_accto… risk… lm    age_sca… all     riskta…   -0.254    0.0215    -11.8 
## 3 f_inv_accto… risk… lm    COUNTRY  all     riskta…   -1.26     0.240      -5.27
## 4 f_inv_accto… risk… lm    age_sca… all     riskta…   -1.55     0.235      -6.57
## 5 f_oth_insfi… risk… lm    no cova… all     riskta…    0.506    0.0202     25.1 
## 6 f_oth_insfi… risk… lm    age_sca… all     riskta…    0.373    0.0196     19.0 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>

summarizing the parameter distribution

summary(specification_results, type = "curve")
## # A tibble: 1 × 7
##   median   mad   min   max   q25   q75    obs
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1  0.240  1.18 -7.26  1.14 -3.88 0.727 225551
summary(specification_results, 
        type = "curve", 
        group = "x",           
        stats = c("median", "mean", "min", "max"))  # Statistiken in einem Vektor auflisten
## # A tibble: 7 × 6
##   x                              median   mean    min   max    obs
##   <chr>                           <dbl>  <dbl>  <dbl> <dbl>  <dbl>
## 1 f_eco_cpi                      -0.354 -0.313 -1.51  0.965 225551
## 2 f_eco_gdpdefl_linked           -1.71  -1.80  -4.50  0.712 221536
## 3 f_hs_oopexp10                  -2.46  -2.76  -6.92  0.785 225551
## 4 f_inv_acctownership_primaryedu -0.759 -0.710 -1.55  0.224 225551
## 5 f_original_gdp                 -2.73  -2.89  -7.26  1.14  225551
## 6 f_original_gini                 0.831  0.872  0.719 1.11  225551
## 7 f_oth_insfinsvcs_int           -2.20  -2.43  -5.84  0.506 225551

Plots

plot(specification_results)

(a <- plot(specification_results, type = "curve", ci = F, ribbon = T) + 
   geom_point(size = 4))

(b <- plot(specification_results, type = "choices", choices = c("x", "y", "model", "controls")) +
   geom_point(size = 2, shape = 4)) 

(c <- plot(specification_results, type = "samplesizes") + ylim(0, 400))

plot_grid(a, b, c, ncol = 1,
          align = "v",
          rel_heights = c(1.5, 2, 0.8),
          axis = "rbl")

plot(specification_results, type = "boxplot") + 
  geom_point(alpha = .4) + 
  scale_fill_brewer(palette = "Pastel2") +
  labs(x = "Effect size", fill = "")

# Lade die notwendigen Pakete
library(dplyr)
library(flextable)
library(officer)

# Extrahiere die Ergebnisse aus dem specr-Objekt
results_df <- specification_results$data

# Erstelle die Zusammenfassung der Spezifikationsanalyse
summary_table <- results_df %>%
  group_by(x, controls) %>%  # Gruppiere nach Risikofaktor (x) und Kontrollvariablen (controls)
  summarise(
    Median = round(median(estimate, na.rm = TRUE), 5),
    Min = round(min(estimate, na.rm = TRUE), 5),
    Max = round(max(estimate, na.rm = TRUE), 5),
    Q25 = round(quantile(estimate, 0.25, na.rm = TRUE), 5),
    Q75 = round(quantile(estimate, 0.75, na.rm = TRUE), 5),
    Significant_Perc = round(mean(p.value < 0.05, na.rm = TRUE) * 100, 5)  # Prozentualer Anteil signifikanter Werte
  ) %>%
  arrange(desc(Significant_Perc))  # Sortiere nach Signifikanzrate

# Erstelle eine formatierte flextable
summary_flextable <- flextable(summary_table) %>%
  theme_vanilla() %>%  # Optische Gestaltung
  set_table_properties(width = 1, layout = "autofit") %>%
  align(align = "center", part = "all") %>%
  bold(part = "header") %>%
  autofit()

# Erstelle ein neues Word-Dokument
doc <- read_docx()

# Füge die Tabelle in das Dokument ein
doc <- body_add_flextable(doc, summary_flextable)

# Speichere das Dokument
print(doc, target = "Specification_Summary_f_all.docx")

Subsetting data for males

specification_males <- setup(
  data = hardship_combined %>%
           filter(gender == 1),  # Filter for males
  y = "risktaking",
    x = c("f_inv_acctownership_primaryedu", "f_oth_insfinsvcs_int",
          "f_hs_oopexp10", "f_eco_gdpdefl_linked", "f_eco_cpi",
          "f_original_gdp", "f_original_gini"),
  controls = c("age_scale", "COUNTRY"), 
  model = "lm"
)

# Run the specifications for males
specification_results_males <- specr(specification_males)

# View the summary of the results
summary(specification_results_males)
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    18.776 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##  median mad   min max   q25  q75
##    0.31 1.2 -5.66 1.3 -2.25 0.71
## 
## Descriptive summary of sample sizes: 
## 
##  median    min    max
##  119591 117485 119591
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 f_inv_accto… risk… lm    no cova… all     riskta…     0.27      0.03      9.14
## 2 f_inv_accto… risk… lm    age_sca… all     riskta…    -0.23      0.03     -7.77
## 3 f_inv_accto… risk… lm    COUNTRY  all     riskta…    -0.72      0.33     -2.16
## 4 f_inv_accto… risk… lm    age_sca… all     riskta…    -1.21      0.33     -3.69
## 5 f_oth_insfi… risk… lm    no cova… all     riskta…     0.61      0.03     21.8 
## 6 f_oth_insfi… risk… lm    age_sca… all     riskta…     0.49      0.03     18.0 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>

Plots for male subset results

plot(specification_results_males)

(a_male <- plot(specification_results_males, type = "curve", ci = F, ribbon = T) + 
   geom_point(size = 4))

(b_male <- plot(specification_results_males, type = "choices", choices = c("x", "y", "model", "controls")) +
   geom_point(size = 2, shape = 4)) 

(c_male <- plot(specification_results_males, type = "samplesizes") + ylim(0, 400))

plot_grid(a_male, b_male, c_male, ncol = 1,
          align = "v",
          rel_heights = c(1.5, 2, 0.8),
          axis = "rbl")

plot(specification_results_males, type = "boxplot") + 
  geom_point(alpha = .4) + 
  scale_fill_brewer(palette = "Pastel2") +
  labs(x = "Effect size", fill = "")

# Lade die notwendigen Pakete
library(dplyr)
library(flextable)
library(officer)

# Extrahiere die Ergebnisse aus dem specr-Objekt
results_df <- specification_results_males$data

# Erstelle die Zusammenfassung der Spezifikationsanalyse
summary_table <- results_df %>%
  group_by(x, controls) %>%  # Gruppiere nach Risikofaktor (x) und Kontrollvariablen (controls)
  summarise(
    Median = round(median(estimate, na.rm = TRUE), 5),
    Min = round(min(estimate, na.rm = TRUE), 5),
    Max = round(max(estimate, na.rm = TRUE), 5),
    Q25 = round(quantile(estimate, 0.25, na.rm = TRUE), 5),
    Q75 = round(quantile(estimate, 0.75, na.rm = TRUE), 5),
    Significant_Perc = round(mean(p.value < 0.05, na.rm = TRUE) * 100, 5)  # Prozentualer Anteil signifikanter Werte
  ) %>%
  arrange(desc(Significant_Perc))  # Sortiere nach Signifikanzrate

# Erstelle eine formatierte flextable
summary_flextable <- flextable(summary_table) %>%
  theme_vanilla() %>%  # Optische Gestaltung
  set_table_properties(width = 1, layout = "autofit") %>%
  align(align = "center", part = "all") %>%
  bold(part = "header") %>%
  autofit()

# Erstelle ein neues Word-Dokument
doc <- read_docx()

# Füge die Tabelle in das Dokument ein
doc <- body_add_flextable(doc, summary_flextable)

# Speichere das Dokument
print(doc, target = "Specification_Summary_f_male.docx")

Subsetting data for females

specification_females <- setup(
  data = hardship_combined %>%
           filter(gender == 0),  # Filter for females
  y = "risktaking",
    x = c("f_inv_acctownership_primaryedu", "f_oth_insfinsvcs_int",
          "f_hs_oopexp10", "f_eco_gdpdefl_linked", "f_eco_cpi",
          "f_original_gdp", "f_original_gini"),
  controls = c("age_scale", "COUNTRY"), 
  model = "lm"
)

# Run the specifications for females
specification_results_females <- specr(specification_females)

# View the summary of the results
summary(specification_results_females)
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    16.321 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##  median mad   min  max   q25  q75
##    0.16 1.3 -9.36 1.14 -5.66 0.62
## 
## Descriptive summary of sample sizes: 
## 
##  median    min    max
##  105960 104051 105960
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 f_inv_accto… risk… lm    no cova… all     riskta…     0.17      0.03      5.34
## 2 f_inv_accto… risk… lm    age_sca… all     riskta…    -0.28      0.03     -9   
## 3 f_inv_accto… risk… lm    COUNTRY  all     riskta…    -1.93      0.34     -5.67
## 4 f_inv_accto… risk… lm    age_sca… all     riskta…    -1.99      0.33     -5.98
## 5 f_oth_insfi… risk… lm    no cova… all     riskta…     0.38      0.03     13.4 
## 6 f_oth_insfi… risk… lm    age_sca… all     riskta…     0.24      0.03      8.5 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>

Plots for female subset results

plot(specification_results_females)

(a_female <- plot(specification_results_females, type = "curve", ci = F, ribbon = T) + 
   geom_point(size = 4))

(b_female <- plot(specification_results_females, type = "choices", choices = c("x", "y", "model", "controls")) +
   geom_point(size = 2, shape = 4)) 

(c_female <- plot(specification_results_females, type = "samplesizes") + ylim(0, 400))

plot_grid(a_female, b_female, c_female, ncol = 1,
          align = "v",
          rel_heights = c(1.5, 2, 0.8),
          axis = "rbl")

plot(specification_results_females, type = "boxplot") + 
  geom_point(alpha = .4) + 
  scale_fill_brewer(palette = "Pastel2") +
  labs(x = "Effect size", fill = "")

# Lade die notwendigen Pakete
library(dplyr)
library(flextable)
library(officer)

# Extrahiere die Ergebnisse aus dem specr-Objekt
results_df <- specification_results_females$data

# Erstelle die Zusammenfassung der Spezifikationsanalyse
summary_table <- results_df %>%
  group_by(x, controls) %>%  # Gruppiere nach Risikofaktor (x) und Kontrollvariablen (controls)
  summarise(
    Median = round(median(estimate, na.rm = TRUE), 5),
    Min = round(min(estimate, na.rm = TRUE), 5),
    Max = round(max(estimate, na.rm = TRUE), 5),
    Q25 = round(quantile(estimate, 0.25, na.rm = TRUE), 5),
    Q75 = round(quantile(estimate, 0.75, na.rm = TRUE), 5),
    Significant_Perc = round(mean(p.value < 0.05, na.rm = TRUE) * 100, 5)  # Prozentualer Anteil signifikanter Werte
  ) %>%
  arrange(desc(Significant_Perc))  # Sortiere nach Signifikanzrate

# Erstelle eine formatierte flextable
summary_flextable <- flextable(summary_table) %>%
  theme_vanilla() %>%  # Optische Gestaltung
  set_table_properties(width = 1, layout = "autofit") %>%
  align(align = "center", part = "all") %>%
  bold(part = "header") %>%
  autofit()

# Erstelle ein neues Word-Dokument
doc <- read_docx()

# Füge die Tabelle in das Dokument ein
doc <- body_add_flextable(doc, summary_flextable)

# Speichere das Dokument
print(doc, target = "Specification_Summary_f_female.docx")

Subsetting data for age-categories

run_specification_for_age <- function(data, age_id, age_label) {
  # Daten für die spezifische Altersgruppe filtern
  data_subset <- data %>%
    filter(age_numeric == age_id)
  
  # Setup für die Spezifikationen durchführen
  specification <- setup(
    data = data_subset,
    y = "risktaking",
    x = c("f_inv_acctownership_primaryedu", "f_oth_insfinsvcs_int",
          "f_hs_oopexp10", "f_eco_gdpdefl_linked", "f_eco_cpi",
          "f_original_gdp", "f_original_gini"),
  controls = c("age_scale", "COUNTRY"), 
    model = "lm"
  )

  # Spezifikationsergebnisse berechnen
  specification_results <- specr(specification)

  # Statistische Auswertungen drucken mit Alterskategorie-Titel
  cat("\nStatistische Ergebnisse für die Alterskategorie:", age_label, "\n")
  print(summary(specification_results, digits = 5))

  # Grafiken für die spezifische Altersgruppe erzeugen und anzeigen
  plot_list <- list(
    plot_a = plot(specification_results, type = "curve", ci = FALSE, ribbon = TRUE) + 
             geom_point(size = 4) + ggtitle(paste("Curve Plot -", age_label)),
    plot_b = plot(specification_results, type = "choices", choices = c("x", "y", "model", "controls")) + 
             geom_point(size = 2, shape = 4) + ggtitle(paste("Choices Plot -", age_label)),
    plot_c = plot(specification_results, type = "samplesizes") + ylim(0, 400) +
             ggtitle(paste("Sample Sizes Plot -", age_label)),
    plot_d = plot(specification_results, type = "boxplot") + 
             geom_point(alpha = .4) + scale_fill_brewer(palette = "Pastel2") + 
             labs(x = "Effect size", fill = "") + ggtitle(paste("Boxplot -", age_label))
  )

  # Rückgabe der Ergebnisse und Plots
  return(list(summary = summary(specification_results, digits = 5), plots = plot_list))
}

# Funktion für jede Altersgruppe aufrufen und sowohl statistische Zusammenfassungen als auch Plots ausgeben
for (i in 1:4) {
  results <- run_specification_for_age(hardship_combined, i, paste("Age Group", i))
  print(results$summary)  # Drucke die Zusammenfassung der Ergebnisse
  print(results$plots$plot_a)
  print(results$plots$plot_b)
  print(results$plots$plot_c)
  print(results$plots$plot_d)
}
## 
## Statistische Ergebnisse für die Alterskategorie: Age Group 1 
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    6.977 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##    median     mad     min     max      q25     q75
##  -0.60605 2.00438 -9.8446 1.19341 -6.07943 0.23914
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   46135 45836 46135
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 f_inv_accto… risk… lm    no cova… all     riskta…   -0.883    0.0482    -18.3 
## 2 f_inv_accto… risk… lm    age_sca… all     riskta…   -0.885    0.0481    -18.4 
## 3 f_inv_accto… risk… lm    COUNTRY  all     riskta…   -2.10     0.369      -5.68
## 4 f_inv_accto… risk… lm    age_sca… all     riskta…   -2.09     0.369      -5.67
## 5 f_oth_insfi… risk… lm    no cova… all     riskta…    0.240    0.0401      5.98
## 6 f_oth_insfi… risk… lm    age_sca… all     riskta…    0.239    0.0401      5.96
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    6.977 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##    median     mad     min     max      q25     q75
##  -0.60605 2.00438 -9.8446 1.19341 -6.07943 0.23914
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   46135 45836 46135
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 f_inv_accto… risk… lm    no cova… all     riskta…   -0.883    0.0482    -18.3 
## 2 f_inv_accto… risk… lm    age_sca… all     riskta…   -0.885    0.0481    -18.4 
## 3 f_inv_accto… risk… lm    COUNTRY  all     riskta…   -2.10     0.369      -5.68
## 4 f_inv_accto… risk… lm    age_sca… all     riskta…   -2.09     0.369      -5.67
## 5 f_oth_insfi… risk… lm    no cova… all     riskta…    0.240    0.0401      5.98
## 6 f_oth_insfi… risk… lm    age_sca… all     riskta…    0.239    0.0401      5.96
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL

## 
## Statistische Ergebnisse für die Alterskategorie: Age Group 2 
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    14.254 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##   median    mad     min     max      q25     q75
##  0.05769 1.1533 -6.7836 0.90026 -4.06013 0.74847
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   93221 91915 93221
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 f_inv_accto… risk… lm    no cova… all     riskta…   -0.245    0.0332     -7.37
## 2 f_inv_accto… risk… lm    age_sca… all     riskta…   -0.321    0.0332     -9.68
## 3 f_inv_accto… risk… lm    COUNTRY  all     riskta…   -1.45     0.352      -4.11
## 4 f_inv_accto… risk… lm    age_sca… all     riskta…   -1.38     0.351      -3.94
## 5 f_oth_insfi… risk… lm    no cova… all     riskta…    0.616    0.0300     20.5 
## 6 f_oth_insfi… risk… lm    age_sca… all     riskta…    0.573    0.0299     19.2 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    14.254 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##   median    mad     min     max      q25     q75
##  0.05769 1.1533 -6.7836 0.90026 -4.06013 0.74847
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   93221 91915 93221
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 f_inv_accto… risk… lm    no cova… all     riskta…   -0.245    0.0332     -7.37
## 2 f_inv_accto… risk… lm    age_sca… all     riskta…   -0.321    0.0332     -9.68
## 3 f_inv_accto… risk… lm    COUNTRY  all     riskta…   -1.45     0.352      -4.11
## 4 f_inv_accto… risk… lm    age_sca… all     riskta…   -1.38     0.351      -3.94
## 5 f_oth_insfi… risk… lm    no cova… all     riskta…    0.616    0.0300     20.5 
## 6 f_oth_insfi… risk… lm    age_sca… all     riskta…    0.573    0.0299     19.2 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL

## 
## Statistische Ergebnisse für die Alterskategorie: Age Group 3 
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    10.032 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##   median    mad      min     max     q25     q75
##  0.31707 0.3747 -0.11337 0.96039 0.19798 0.78612
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   63153 61551 63153
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 f_inv_accto… risk… lm    no cova… all     riskta…   0.148     0.0400     3.70 
## 2 f_inv_accto… risk… lm    age_sca… all     riskta…   0.0960    0.0400     2.40 
## 3 f_inv_accto… risk… lm    COUNTRY  all     riskta…   0.199     0.731      0.272
## 4 f_inv_accto… risk… lm    age_sca… all     riskta…   0.0747    0.730      0.102
## 5 f_oth_insfi… risk… lm    no cova… all     riskta…   0.254     0.0401     6.34 
## 6 f_oth_insfi… risk… lm    age_sca… all     riskta…   0.251     0.0400     6.28 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    10.032 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##   median    mad      min     max     q25     q75
##  0.31707 0.3747 -0.11337 0.96039 0.19798 0.78612
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   63153 61551 63153
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 f_inv_accto… risk… lm    no cova… all     riskta…   0.148     0.0400     3.70 
## 2 f_inv_accto… risk… lm    age_sca… all     riskta…   0.0960    0.0400     2.40 
## 3 f_inv_accto… risk… lm    COUNTRY  all     riskta…   0.199     0.731      0.272
## 4 f_inv_accto… risk… lm    age_sca… all     riskta…   0.0747    0.730      0.102
## 5 f_oth_insfi… risk… lm    no cova… all     riskta…   0.254     0.0401     6.34 
## 6 f_oth_insfi… risk… lm    age_sca… all     riskta…   0.251     0.0400     6.28 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL

## 
## Statistische Ergebnisse für die Alterskategorie: Age Group 4 
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    3.959 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##  median     mad      min      max     q25      q75
##  1.1642 2.59842 -2.09797 17.30643 0.33954 10.49142
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   23042 22234 23042
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 f_inv_accto… risk… lm    no cova… all     riskta…  0.409      0.0717    5.70  
## 2 f_inv_accto… risk… lm    age_sca… all     riskta…  0.401      0.0717    5.60  
## 3 f_inv_accto… risk… lm    COUNTRY  all     riskta…  3.59       1.99      1.81  
## 4 f_inv_accto… risk… lm    age_sca… all     riskta…  3.69       1.98      1.86  
## 5 f_oth_insfi… risk… lm    no cova… all     riskta… -0.00598    0.0677   -0.0884
## 6 f_oth_insfi… risk… lm    age_sca… all     riskta… -0.00575    0.0676   -0.085 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    3.959 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##  median     mad      min      max     q25      q75
##  1.1642 2.59842 -2.09797 17.30643 0.33954 10.49142
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   23042 22234 23042
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 f_inv_accto… risk… lm    no cova… all     riskta…  0.409      0.0717    5.70  
## 2 f_inv_accto… risk… lm    age_sca… all     riskta…  0.401      0.0717    5.60  
## 3 f_inv_accto… risk… lm    COUNTRY  all     riskta…  3.59       1.99      1.81  
## 4 f_inv_accto… risk… lm    age_sca… all     riskta…  3.69       1.98      1.86  
## 5 f_oth_insfi… risk… lm    no cova… all     riskta… -0.00598    0.0677   -0.0884
## 6 f_oth_insfi… risk… lm    age_sca… all     riskta… -0.00575    0.0676   -0.085 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL

Hardship Crime

Setup for specifications

library(specr)

# Setup für die Spezifikationen mit einer umfassenderen Auswahl von Variablen
specification <- setup(
  data = hardship_combined,
  y = "risktaking",  # abhängige Variable
    x = c("c_bh_homicide", "c_bh_childmalt", "c_bh_violextchildprot",
          "c_bh_parviolenceprog", "c_bh_elderabuse", "c_theft_estcorruption",
          "c_oth_polstab"),
  controls = c("age_scale", "COUNTRY"), 
  model = "lm"
)

# Zusammenfassung der Spezifikationen
summary(specification)
## Setup for the Specification Curve Analysis
## -------------------------------------------
## Class:                      specr.setup -- version: 1.0.1 
## Number of specifications:   28 
## 
## Specifications:
## 
##   Independent variable:     c_bh_homicide, c_bh_childmalt, c_bh_violextchildprot, c_bh_parviolenceprog, c_bh_elderabuse, c_theft_estcorruption, c_oth_polstab 
##   Dependent variable:       risktaking 
##   Models:                   lm 
##   Covariates:               no covariates, age_scale, COUNTRY, age_scale + COUNTRY 
##   Subsets analyses:         all 
## 
## Function used to extract parameters:
## 
##   function (x) 
## broom::tidy(x, conf.int = TRUE)
## <environment: 0x1662b14a8>
## 
## 
## Head of specifications table (first 6 rows):
## # A tibble: 6 × 6
##   x              y          model controls            subsets formula           
##   <chr>          <chr>      <chr> <chr>               <chr>   <glue>            
## 1 c_bh_homicide  risktaking lm    no covariates       all     risktaking ~ c_bh…
## 2 c_bh_homicide  risktaking lm    age_scale           all     risktaking ~ c_bh…
## 3 c_bh_homicide  risktaking lm    COUNTRY             all     risktaking ~ c_bh…
## 4 c_bh_homicide  risktaking lm    age_scale + COUNTRY all     risktaking ~ c_bh…
## 5 c_bh_childmalt risktaking lm    no covariates       all     risktaking ~ c_bh…
## 6 c_bh_childmalt risktaking lm    age_scale           all     risktaking ~ c_bh…

run specifications

specification_results <- specr(specification)
specification_results
## Models fitted based on 28 specifications
## Number of cores used: 1 
## 
## Descriptive summary of the specification curve:
## 
##  median mad   min  max   q25  q75
##    0.49 0.8 -16.8 5.32 -1.54 0.94
summary(specification_results, digits = 5)
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    39.672 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad       min     max      q25     q75
##  0.49293 0.79936 -16.80179 5.31884 -1.54067 0.93991
## 
## Descriptive summary of sample sizes: 
## 
##  median    min    max
##  225551 225551 225551
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 c_bh_homici… risk… lm    no cova… all     riskta…    0.947    0.0204     46.5 
## 2 c_bh_homici… risk… lm    age_sca… all     riskta…    0.553    0.0201     27.4 
## 3 c_bh_homici… risk… lm    COUNTRY  all     riskta…    4.35     0.826       5.27
## 4 c_bh_homici… risk… lm    age_sca… all     riskta…    5.32     0.810       6.57
## 5 c_bh_childm… risk… lm    no cova… all     riskta…    1.01     0.0221     45.7 
## 6 c_bh_childm… risk… lm    age_sca… all     riskta…    0.644    0.0218     29.5 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>

summarizing the parameter distribution

summary(specification_results, type = "curve")
## # A tibble: 1 × 7
##   median   mad   min   max   q25   q75    obs
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1  0.493 0.799 -16.8  5.32 -1.54 0.940 225551
summary(specification_results, 
        type = "curve", 
        group = "x",           
        stats = c("median", "mean", "min", "max"))  # Statistiken in einem Vektor auflisten
## # A tibble: 7 × 6
##   x                     median   mean     min   max    obs
##   <chr>                  <dbl>  <dbl>   <dbl> <dbl>  <dbl>
## 1 c_bh_childmalt        -0.409 -0.398  -1.79  1.01  225551
## 2 c_bh_elderabuse       -1.86  -1.88   -4.75  0.939 225551
## 3 c_bh_homicide          2.65   2.79    0.553 5.32  225551
## 4 c_bh_parviolenceprog  -0.697 -0.665  -1.70  0.433 225551
## 5 c_bh_violextchildprot  0.997  0.971   0.739 1.15  225551
## 6 c_oth_polstab         -0.441 -0.375  -1.51  0.897 225551
## 7 c_theft_estcorruption -6.81  -7.43  -16.8   0.685 225551

Plots

plot(specification_results)

(a <- plot(specification_results, type = "curve", ci = F, ribbon = T) + 
   geom_point(size = 4))

(b <- plot(specification_results, type = "choices", choices = c("x", "y", "model", "controls")) +
   geom_point(size = 2, shape = 4)) 

(c <- plot(specification_results, type = "samplesizes") + ylim(0, 400))

plot_grid(a, b, c, ncol = 1,
          align = "v",
          rel_heights = c(1.5, 2, 0.8),
          axis = "rbl")

plot(specification_results, type = "boxplot") + 
  geom_point(alpha = .4) + 
  scale_fill_brewer(palette = "Pastel2") +
  labs(x = "Effect size", fill = "")

# Lade die notwendigen Pakete
library(dplyr)
library(flextable)
library(officer)

# Extrahiere die Ergebnisse aus dem specr-Objekt
results_df <- specification_results$data

# Erstelle die Zusammenfassung der Spezifikationsanalyse
summary_table <- results_df %>%
  group_by(x, controls) %>%  # Gruppiere nach Risikofaktor (x) und Kontrollvariablen (controls)
  summarise(
    Median = round(median(estimate, na.rm = TRUE), 5),
    Min = round(min(estimate, na.rm = TRUE), 5),
    Max = round(max(estimate, na.rm = TRUE), 5),
    Q25 = round(quantile(estimate, 0.25, na.rm = TRUE), 5),
    Q75 = round(quantile(estimate, 0.75, na.rm = TRUE), 5),
    Significant_Perc = round(mean(p.value < 0.05, na.rm = TRUE) * 100, 5)  # Prozentualer Anteil signifikanter Werte
  ) %>%
  arrange(desc(Significant_Perc))  # Sortiere nach Signifikanzrate

# Erstelle eine formatierte flextable
summary_flextable <- flextable(summary_table) %>%
  theme_vanilla() %>%  # Optische Gestaltung
  set_table_properties(width = 1, layout = "autofit") %>%
  align(align = "center", part = "all") %>%
  bold(part = "header") %>%
  autofit()

# Erstelle ein neues Word-Dokument
doc <- read_docx()

# Füge die Tabelle in das Dokument ein
doc <- body_add_flextable(doc, summary_flextable)

# Speichere das Dokument
print(doc, target = "Specification_Summary_c_all.docx")

Subsetting data for males

specification_males <- setup(
  data = hardship_combined %>%
           filter(gender == 1),  # Filter for males
  y = "risktaking",
    x = c("c_bh_homicide", "c_bh_childmalt", "c_bh_violextchildprot",
          "c_bh_parviolenceprog", "c_bh_elderabuse", "c_theft_estcorruption",
          "c_oth_polstab"),
  controls = c("age_scale", "COUNTRY"), 
  model = "lm"
)

# Run the specifications for males
specification_results_males <- specr(specification_males)

# View the summary of the results
summary(specification_results_males)
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    19.602 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##  median  mad    min  max   q25  q75
##    0.48 0.82 -13.11 4.15 -1.23 0.92
## 
## Descriptive summary of sample sizes: 
## 
##  median    min    max
##  119591 119591 119591
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 c_bh_homici… risk… lm    no cova… all     riskta…     1.03      0.03     37.4 
## 2 c_bh_homici… risk… lm    age_sca… all     riskta…     0.65      0.03     24.0 
## 3 c_bh_homici… risk… lm    COUNTRY  all     riskta…     2.48      1.15      2.16
## 4 c_bh_homici… risk… lm    age_sca… all     riskta…     4.15      1.12      3.69
## 5 c_bh_childm… risk… lm    no cova… all     riskta…     1.04      0.03     34.4 
## 6 c_bh_childm… risk… lm    age_sca… all     riskta…     0.68      0.03     22.9 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>

Plots for male subset results

plot(specification_results_males)

(a_male <- plot(specification_results_males, type = "curve", ci = F, ribbon = T) + 
   geom_point(size = 4))

(b_male <- plot(specification_results_males, type = "choices", choices = c("x", "y", "model", "controls")) +
   geom_point(size = 2, shape = 4)) 

(c_male <- plot(specification_results_males, type = "samplesizes") + ylim(0, 400))

plot_grid(a_male, b_male, c_male, ncol = 1,
          align = "v",
          rel_heights = c(1.5, 2, 0.8),
          axis = "rbl")

plot(specification_results_males, type = "boxplot") + 
  geom_point(alpha = .4) + 
  scale_fill_brewer(palette = "Pastel2") +
  labs(x = "Effect size", fill = "")

# Lade die notwendigen Pakete
library(dplyr)
library(flextable)
library(officer)

# Extrahiere die Ergebnisse aus dem specr-Objekt
results_df <- specification_results_males$data

# Erstelle die Zusammenfassung der Spezifikationsanalyse
summary_table <- results_df %>%
  group_by(x, controls) %>%  # Gruppiere nach Risikofaktor (x) und Kontrollvariablen (controls)
  summarise(
    Median = round(median(estimate, na.rm = TRUE), 5),
    Min = round(min(estimate, na.rm = TRUE), 5),
    Max = round(max(estimate, na.rm = TRUE), 5),
    Q25 = round(quantile(estimate, 0.25, na.rm = TRUE), 5),
    Q75 = round(quantile(estimate, 0.75, na.rm = TRUE), 5),
    Significant_Perc = round(mean(p.value < 0.05, na.rm = TRUE) * 100, 5)  # Prozentualer Anteil signifikanter Werte
  ) %>%
  arrange(desc(Significant_Perc))  # Sortiere nach Signifikanzrate

# Erstelle eine formatierte flextable
summary_flextable <- flextable(summary_table) %>%
  theme_vanilla() %>%  # Optische Gestaltung
  set_table_properties(width = 1, layout = "autofit") %>%
  align(align = "center", part = "all") %>%
  bold(part = "header") %>%
  autofit()

# Erstelle ein neues Word-Dokument
doc <- read_docx()

# Füge die Tabelle in das Dokument ein
doc <- body_add_flextable(doc, summary_flextable)

# Speichere das Dokument
print(doc, target = "Specification_Summary_c_male.docx")

Subsetting data for females

specification_females <- setup(
  data = hardship_combined %>%
           filter(gender == 0),  # Filter for females
  y = "risktaking",
    x = c("c_bh_homicide", "c_bh_childmalt", "c_bh_violextchildprot",
          "c_bh_parviolenceprog", "c_bh_elderabuse", "c_theft_estcorruption",
          "c_oth_polstab"),
  controls = c("age_scale", "COUNTRY"), 
  model = "lm"
)

# Run the specifications for females
specification_results_females <- specr(specification_females)

# View the summary of the results
summary(specification_results_females)
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    16.606 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##  median  mad    min  max   q25  q75
##    0.43 1.12 -21.68 6.86 -1.91 0.86
## 
## Descriptive summary of sample sizes: 
## 
##  median    min    max
##  105960 105960 105960
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 c_bh_homici… risk… lm    no cova… all     riskta…     0.85      0.03     28.5 
## 2 c_bh_homici… risk… lm    age_sca… all     riskta…     0.44      0.03     14.9 
## 3 c_bh_homici… risk… lm    COUNTRY  all     riskta…     6.66      1.17      5.67
## 4 c_bh_homici… risk… lm    age_sca… all     riskta…     6.86      1.15      5.98
## 5 c_bh_childm… risk… lm    no cova… all     riskta…     0.93      0.03     29.1 
## 6 c_bh_childm… risk… lm    age_sca… all     riskta…     0.56      0.03     17.7 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>

Plots for female subset results

plot(specification_results_females)

(a_female <- plot(specification_results_females, type = "curve", ci = F, ribbon = T) + 
   geom_point(size = 4))

(b_female <- plot(specification_results_females, type = "choices", choices = c("x", "y", "model", "controls")) +
   geom_point(size = 2, shape = 4)) 

(c_female <- plot(specification_results_females, type = "samplesizes") + ylim(0, 400))

plot_grid(a_female, b_female, c_female, ncol = 1,
          align = "v",
          rel_heights = c(1.5, 2, 0.8),
          axis = "rbl")

plot(specification_results_females, type = "boxplot") + 
  geom_point(alpha = .4) + 
  scale_fill_brewer(palette = "Pastel2") +
  labs(x = "Effect size", fill = "")

# Lade die notwendigen Pakete
library(dplyr)
library(flextable)
library(officer)

# Extrahiere die Ergebnisse aus dem specr-Objekt
results_df <- specification_results_females$data

# Erstelle die Zusammenfassung der Spezifikationsanalyse
summary_table <- results_df %>%
  group_by(x, controls) %>%  # Gruppiere nach Risikofaktor (x) und Kontrollvariablen (controls)
  summarise(
    Median = round(median(estimate, na.rm = TRUE), 5),
    Min = round(min(estimate, na.rm = TRUE), 5),
    Max = round(max(estimate, na.rm = TRUE), 5),
    Q25 = round(quantile(estimate, 0.25, na.rm = TRUE), 5),
    Q75 = round(quantile(estimate, 0.75, na.rm = TRUE), 5),
    Significant_Perc = round(mean(p.value < 0.05, na.rm = TRUE) * 100, 5)  # Prozentualer Anteil signifikanter Werte
  ) %>%
  arrange(desc(Significant_Perc))  # Sortiere nach Signifikanzrate

# Erstelle eine formatierte flextable
summary_flextable <- flextable(summary_table) %>%
  theme_vanilla() %>%  # Optische Gestaltung
  set_table_properties(width = 1, layout = "autofit") %>%
  align(align = "center", part = "all") %>%
  bold(part = "header") %>%
  autofit()

# Erstelle ein neues Word-Dokument
doc <- read_docx()

# Füge die Tabelle in das Dokument ein
doc <- body_add_flextable(doc, summary_flextable)

# Speichere das Dokument
print(doc, target = "Specification_Summary_c_female.docx")

Subsetting data for age-categories

run_specification_for_age <- function(data, age_id, age_label) {
  # Daten für die spezifische Altersgruppe filtern
  data_subset <- data %>%
    filter(age_numeric == age_id)
  
  # Setup für die Spezifikationen durchführen
  specification <- setup(
    data = data_subset,
    y = "risktaking",
    x = c("c_bh_homicide", "c_bh_childmalt", "c_bh_violextchildprot",
          "c_bh_parviolenceprog", "c_bh_elderabuse", "c_theft_estcorruption",
          "c_oth_polstab"),
    controls = c("age_scale", "COUNTRY"), 
    model = "lm"
  )

  # Spezifikationsergebnisse berechnen
  specification_results <- specr(specification)

  # Statistische Auswertungen drucken mit Alterskategorie-Titel
  cat("\nStatistische Ergebnisse für die Alterskategorie:", age_label, "\n")
  print(summary(specification_results, digits = 5))

  # Grafiken für die spezifische Altersgruppe erzeugen und anzeigen
  plot_list <- list(
    plot_a = plot(specification_results, type = "curve", ci = FALSE, ribbon = TRUE) + 
             geom_point(size = 4) + ggtitle(paste("Curve Plot -", age_label)),
    plot_b = plot(specification_results, type = "choices", choices = c("x", "y", "model", "controls")) + 
             geom_point(size = 2, shape = 4) + ggtitle(paste("Choices Plot -", age_label)),
    plot_c = plot(specification_results, type = "samplesizes") + ylim(0, 400) +
             ggtitle(paste("Sample Sizes Plot -", age_label)),
    plot_d = plot(specification_results, type = "boxplot") + 
             geom_point(alpha = .4) + scale_fill_brewer(palette = "Pastel2") + 
             labs(x = "Effect size", fill = "") + ggtitle(paste("Boxplot -", age_label))
  )

  # Rückgabe der Ergebnisse und Plots
  return(list(summary = summary(specification_results, digits = 5), plots = plot_list))
}

# Funktion für jede Altersgruppe aufrufen und sowohl statistische Zusammenfassungen als auch Plots ausgeben
for (i in 1:4) {
  results <- run_specification_for_age(hardship_combined, i, paste("Age Group", i))
  print(results$summary)  # Drucke die Zusammenfassung der Ergebnisse
  print(results$plots$plot_a)
  print(results$plots$plot_b)
  print(results$plots$plot_c)
  print(results$plots$plot_d)
}
## 
## Statistische Ergebnisse für die Alterskategorie: Age Group 1 
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    7.158 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##    median     mad       min     max      q25     q75
##  -0.19242 1.66845 -22.79565 7.21629 -2.04927 0.40071
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   46135 46135 46135
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 c_bh_homici… risk… lm    no cova… all     riskta…  -0.0555    0.0471     -1.18
## 2 c_bh_homici… risk… lm    age_sca… all     riskta…  -0.0731    0.0472     -1.55
## 3 c_bh_homici… risk… lm    COUNTRY  all     riskta…   7.22      1.27        5.68
## 4 c_bh_homici… risk… lm    age_sca… all     riskta…   7.20      1.27        5.67
## 5 c_bh_childm… risk… lm    no cova… all     riskta…   0.504     0.0479     10.5 
## 6 c_bh_childm… risk… lm    age_sca… all     riskta…   0.500     0.0479     10.4 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    7.158 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##    median     mad       min     max      q25     q75
##  -0.19242 1.66845 -22.79565 7.21629 -2.04927 0.40071
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   46135 46135 46135
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 c_bh_homici… risk… lm    no cova… all     riskta…  -0.0555    0.0471     -1.18
## 2 c_bh_homici… risk… lm    age_sca… all     riskta…  -0.0731    0.0472     -1.55
## 3 c_bh_homici… risk… lm    COUNTRY  all     riskta…   7.22      1.27        5.68
## 4 c_bh_homici… risk… lm    age_sca… all     riskta…   7.20      1.27        5.67
## 5 c_bh_childm… risk… lm    no cova… all     riskta…   0.504     0.0479     10.5 
## 6 c_bh_childm… risk… lm    age_sca… all     riskta…   0.500     0.0479     10.4 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL

## 
## Statistische Ergebnisse für die Alterskategorie: Age Group 2 
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    14.471 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##  median     mad       min     max      q25     q75
##  0.3591 0.91813 -15.70777 4.97252 -1.60268 0.82321
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   93221 93221 93221
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 c_bh_homici… risk… lm    no cova… all     riskta…    0.768    0.0321     24.0 
## 2 c_bh_homici… risk… lm    age_sca… all     riskta…    0.688    0.0321     21.4 
## 3 c_bh_homici… risk… lm    COUNTRY  all     riskta…    4.97     1.21        4.11
## 4 c_bh_homici… risk… lm    age_sca… all     riskta…    4.76     1.21        3.94
## 5 c_bh_childm… risk… lm    no cova… all     riskta…    0.871    0.0341     25.5 
## 6 c_bh_childm… risk… lm    age_sca… all     riskta…    0.808    0.0340     23.7 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    14.471 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##  median     mad       min     max      q25     q75
##  0.3591 0.91813 -15.70777 4.97252 -1.60268 0.82321
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   93221 93221 93221
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 c_bh_homici… risk… lm    no cova… all     riskta…    0.768    0.0321     24.0 
## 2 c_bh_homici… risk… lm    age_sca… all     riskta…    0.688    0.0321     21.4 
## 3 c_bh_homici… risk… lm    COUNTRY  all     riskta…    4.97     1.21        4.11
## 4 c_bh_homici… risk… lm    age_sca… all     riskta…    4.76     1.21        3.94
## 5 c_bh_childm… risk… lm    no cova… all     riskta…    0.871    0.0341     25.5 
## 6 c_bh_childm… risk… lm    age_sca… all     riskta…    0.808    0.0340     23.7 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL

## 
## Statistische Ergebnisse für die Alterskategorie: Age Group 3 
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    9.682 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad      min     max      q25     q75
##  0.34193 0.58268 -6.96597 2.16543 -0.07882 0.71764
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   63153 63153 63153
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 c_bh_homici… risk… lm    no cova… all     riskta…    0.755    0.0365    20.7  
## 2 c_bh_homici… risk… lm    age_sca… all     riskta…    0.713    0.0364    19.6  
## 3 c_bh_homici… risk… lm    COUNTRY  all     riskta…   -0.686    2.52      -0.272
## 4 c_bh_homici… risk… lm    age_sca… all     riskta…   -0.257    2.51      -0.102
## 5 c_bh_childm… risk… lm    no cova… all     riskta…    0.582    0.0412    14.1  
## 6 c_bh_childm… risk… lm    age_sca… all     riskta…    0.544    0.0412    13.2  
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    9.682 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad      min     max      q25     q75
##  0.34193 0.58268 -6.96597 2.16543 -0.07882 0.71764
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   63153 63153 63153
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 c_bh_homici… risk… lm    no cova… all     riskta…    0.755    0.0365    20.7  
## 2 c_bh_homici… risk… lm    age_sca… all     riskta…    0.713    0.0364    19.6  
## 3 c_bh_homici… risk… lm    COUNTRY  all     riskta…   -0.686    2.52      -0.272
## 4 c_bh_homici… risk… lm    age_sca… all     riskta…   -0.257    2.51      -0.102
## 5 c_bh_childm… risk… lm    no cova… all     riskta…    0.582    0.0412    14.1  
## 6 c_bh_childm… risk… lm    age_sca… all     riskta…    0.544    0.0412    13.2  
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL

## 
## Statistische Ergebnisse für die Alterskategorie: Age Group 4 
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    3.719 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad       min      max     q25     q75
##  0.71243 1.65731 -12.68596 40.07391 0.30351 2.54795
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   23042 23042 23042
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 c_bh_homici… risk… lm    no cova… all     riskta…    0.644    0.0580     11.1 
## 2 c_bh_homici… risk… lm    age_sca… all     riskta…    0.645    0.0580     11.1 
## 3 c_bh_homici… risk… lm    COUNTRY  all     riskta…  -12.3      6.83       -1.81
## 4 c_bh_homici… risk… lm    age_sca… all     riskta…  -12.7      6.83       -1.86
## 5 c_bh_childm… risk… lm    no cova… all     riskta…    0.509    0.0677      7.52
## 6 c_bh_childm… risk… lm    age_sca… all     riskta…    0.503    0.0676      7.43
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    3.719 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad       min      max     q25     q75
##  0.71243 1.65731 -12.68596 40.07391 0.30351 2.54795
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   23042 23042 23042
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 c_bh_homici… risk… lm    no cova… all     riskta…    0.644    0.0580     11.1 
## 2 c_bh_homici… risk… lm    age_sca… all     riskta…    0.645    0.0580     11.1 
## 3 c_bh_homici… risk… lm    COUNTRY  all     riskta…  -12.3      6.83       -1.81
## 4 c_bh_homici… risk… lm    age_sca… all     riskta…  -12.7      6.83       -1.86
## 5 c_bh_childm… risk… lm    no cova… all     riskta…    0.509    0.0677      7.52
## 6 c_bh_childm… risk… lm    age_sca… all     riskta…    0.503    0.0676      7.43
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL

Hardship Environment

Setup for specifications

library(specr)

# Setup für die Spezifikationen mit einer umfassenderen Auswahl von Variablen
specification <- setup(
  data = hardship_combined,
  y = "risktaking",  # abhängige Variable
    x = c("e_oth_drinkingwater",
          "e_exp_watersanithyg100k", "e_ses_gini", "e_ses_school", "e_exp_disaster", 
          "e_exp_airdeath100k", "e_exp_watersanithyg"),
  controls = c("age_scale", "COUNTRY"), 
  model = "lm"
)

# Zusammenfassung der Spezifikationen
summary(specification)
## Setup for the Specification Curve Analysis
## -------------------------------------------
## Class:                      specr.setup -- version: 1.0.1 
## Number of specifications:   28 
## 
## Specifications:
## 
##   Independent variable:     e_oth_drinkingwater, e_exp_watersanithyg100k, e_ses_gini, e_ses_school, e_exp_disaster, e_exp_airdeath100k, e_exp_watersanithyg 
##   Dependent variable:       risktaking 
##   Models:                   lm 
##   Covariates:               no covariates, age_scale, COUNTRY, age_scale + COUNTRY 
##   Subsets analyses:         all 
## 
## Function used to extract parameters:
## 
##   function (x) 
## broom::tidy(x, conf.int = TRUE)
## <environment: 0x141326708>
## 
## 
## Head of specifications table (first 6 rows):
## # A tibble: 6 × 6
##   x                       y          model controls            subsets formula  
##   <chr>                   <chr>      <chr> <chr>               <chr>   <glue>   
## 1 e_oth_drinkingwater     risktaking lm    no covariates       all     risktaki…
## 2 e_oth_drinkingwater     risktaking lm    age_scale           all     risktaki…
## 3 e_oth_drinkingwater     risktaking lm    COUNTRY             all     risktaki…
## 4 e_oth_drinkingwater     risktaking lm    age_scale + COUNTRY all     risktaki…
## 5 e_exp_watersanithyg100k risktaking lm    no covariates       all     risktaki…
## 6 e_exp_watersanithyg100k risktaking lm    age_scale           all     risktaki…

run specifications

specification_results <- specr(specification)
specification_results
## Models fitted based on 28 specifications
## Number of cores used: 1 
## 
## Descriptive summary of the specification curve:
## 
##  median  mad    min  max   q25  q75
##    0.09 1.35 -35.27 1.31 -2.02 0.77
summary(specification_results, digits = 5)
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    38.497 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad       min     max      q25     q75
##  0.09447 1.35362 -35.27253 1.31413 -2.01908 0.76561
## 
## Descriptive summary of sample sizes: 
## 
##  median    min    max
##  224550 224550 224550
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 e_oth_drink… risk… lm    no cova… all     riskta…    1.23     0.0253     48.6 
## 2 e_oth_drink… risk… lm    age_sca… all     riskta…    0.612    0.0253     24.2 
## 3 e_oth_drink… risk… lm    COUNTRY  all     riskta…   -1.38     0.262      -5.26
## 4 e_oth_drink… risk… lm    age_sca… all     riskta…   -1.68     0.257      -6.57
## 5 e_exp_water… risk… lm    no cova… all     riskta…    1.31     0.0222     59.1 
## 6 e_exp_water… risk… lm    age_sca… all     riskta…    0.728    0.0224     32.5 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>

summarizing the parameter distribution

summary(specification_results, type = "curve")
## # A tibble: 1 × 7
##   median   mad   min   max   q25   q75    obs
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 0.0945  1.35 -35.3  1.31 -2.02 0.766 224550
summary(specification_results, 
        type = "curve", 
        group = "x",           
        stats = c("median", "mean", "min", "max"))  # Statistiken in einem Vektor auflisten
## # A tibble: 7 × 6
##   x                         median     mean     min     max    obs
##   <chr>                      <dbl>    <dbl>   <dbl>   <dbl>  <dbl>
## 1 e_exp_airdeath100k      -10.6    -11.7    -26.7    1.17   224550
## 2 e_exp_disaster           -1.52    -1.66    -3.51  -0.0698 224550
## 3 e_exp_watersanithyg     -14.2    -15.7    -35.3    0.903  224550
## 4 e_exp_watersanithyg100k  -0.592   -0.552   -2.34   1.31   224550
## 5 e_oth_drinkingwater      -0.383   -0.306   -1.68   1.23   224550
## 6 e_ses_gini                0.910    0.884    0.642  1.07   224550
## 7 e_ses_school             -0.0286   0.0475  -0.386  0.634  224550

Plots

plot(specification_results)

(a <- plot(specification_results, type = "curve", ci = F, ribbon = T) + 
   geom_point(size = 4))

(b <- plot(specification_results, type = "choices", choices = c("x", "y", "model", "controls")) +
   geom_point(size = 2, shape = 4)) 

(c <- plot(specification_results, type = "samplesizes") + ylim(0, 400))

plot_grid(a, b, c, ncol = 1,
          align = "v",
          rel_heights = c(1.5, 2, 0.8),
          axis = "rbl")

plot(specification_results, type = "boxplot") + 
  geom_point(alpha = .4) + 
  scale_fill_brewer(palette = "Pastel2") +
  labs(x = "Effect size", fill = "")

# Lade die notwendigen Pakete
library(dplyr)
library(flextable)
library(officer)

# Extrahiere die Ergebnisse aus dem specr-Objekt
results_df <- specification_results$data

# Erstelle die Zusammenfassung der Spezifikationsanalyse
summary_table <- results_df %>%
  group_by(x, controls) %>%  # Gruppiere nach Risikofaktor (x) und Kontrollvariablen (controls)
  summarise(
    Median = round(median(estimate, na.rm = TRUE), 5),
    Min = round(min(estimate, na.rm = TRUE), 5),
    Max = round(max(estimate, na.rm = TRUE), 5),
    Q25 = round(quantile(estimate, 0.25, na.rm = TRUE), 5),
    Q75 = round(quantile(estimate, 0.75, na.rm = TRUE), 5),
    Significant_Perc = round(mean(p.value < 0.05, na.rm = TRUE) * 100, 5)  # Prozentualer Anteil signifikanter Werte
  ) %>%
  arrange(desc(Significant_Perc))  # Sortiere nach Signifikanzrate

# Erstelle eine formatierte flextable
summary_flextable <- flextable(summary_table) %>%
  theme_vanilla() %>%  # Optische Gestaltung
  set_table_properties(width = 1, layout = "autofit") %>%
  align(align = "center", part = "all") %>%
  bold(part = "header") %>%
  autofit()

# Erstelle ein neues Word-Dokument
doc <- read_docx()

# Füge die Tabelle in das Dokument ein
doc <- body_add_flextable(doc, summary_flextable)

# Speichere das Dokument
print(doc, target = "Specification_Summary_e_all.docx")

Subsetting data for males

specification_males <- setup(
  data = hardship_combined %>%
           filter(gender == 1),  # Filter for males
  y = "risktaking",
    x = c("e_oth_drinkingwater",
          "e_exp_watersanithyg100k", "e_ses_gini", "e_ses_school", "e_exp_disaster", 
          "e_exp_airdeath100k", "e_exp_watersanithyg"),
  controls = c("age_scale", "COUNTRY"), 
  model = "lm"
)

# Run the specifications for males
specification_results_males <- specr(specification_males)

# View the summary of the results
summary(specification_results_males)
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    18.851 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##  median  mad    min  max  q25  q75
##    0.12 1.35 -27.52 1.46 -1.4 0.77
## 
## Descriptive summary of sample sizes: 
## 
##  median    min    max
##  119093 119093 119093
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 e_oth_drink… risk… lm    no cova… all     riskta…     1.41      0.04     39.8 
## 2 e_oth_drink… risk… lm    age_sca… all     riskta…     0.75      0.04     21.0 
## 3 e_oth_drink… risk… lm    COUNTRY  all     riskta…    -0.79      0.36     -2.16
## 4 e_oth_drink… risk… lm    age_sca… all     riskta…    -1.31      0.36     -3.69
## 5 e_exp_water… risk… lm    no cova… all     riskta…     1.46      0.03     48.1 
## 6 e_exp_water… risk… lm    age_sca… all     riskta…     0.85      0.03     27.4 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>

Plots for male subset results

plot(specification_results_males)

(a_male <- plot(specification_results_males, type = "curve", ci = F, ribbon = T) + 
   geom_point(size = 4))

(b_male <- plot(specification_results_males, type = "choices", choices = c("x", "y", "model", "controls")) +
   geom_point(size = 2, shape = 4)) 

(c_male <- plot(specification_results_males, type = "samplesizes") + ylim(0, 400))

plot_grid(a_male, b_male, c_male, ncol = 1,
          align = "v",
          rel_heights = c(1.5, 2, 0.8),
          axis = "rbl")

plot(specification_results_males, type = "boxplot") + 
  geom_point(alpha = .4) + 
  scale_fill_brewer(palette = "Pastel2") +
  labs(x = "Effect size", fill = "")

# Lade die notwendigen Pakete
library(dplyr)
library(flextable)
library(officer)

# Extrahiere die Ergebnisse aus dem specr-Objekt
results_df <- specification_results_males$data

# Erstelle die Zusammenfassung der Spezifikationsanalyse
summary_table <- results_df %>%
  group_by(x, controls) %>%  # Gruppiere nach Risikofaktor (x) und Kontrollvariablen (controls)
  summarise(
    Median = round(median(estimate, na.rm = TRUE), 5),
    Min = round(min(estimate, na.rm = TRUE), 5),
    Max = round(max(estimate, na.rm = TRUE), 5),
    Q25 = round(quantile(estimate, 0.25, na.rm = TRUE), 5),
    Q75 = round(quantile(estimate, 0.75, na.rm = TRUE), 5),
    Significant_Perc = round(mean(p.value < 0.05, na.rm = TRUE) * 100, 5)  # Prozentualer Anteil signifikanter Werte
  ) %>%
  arrange(desc(Significant_Perc))  # Sortiere nach Signifikanzrate

# Erstelle eine formatierte flextable
summary_flextable <- flextable(summary_table) %>%
  theme_vanilla() %>%  # Optische Gestaltung
  set_table_properties(width = 1, layout = "autofit") %>%
  align(align = "center", part = "all") %>%
  bold(part = "header") %>%
  autofit()

# Erstelle ein neues Word-Dokument
doc <- read_docx()

# Füge die Tabelle in das Dokument ein
doc <- body_add_flextable(doc, summary_flextable)

# Speichere das Dokument
print(doc, target = "Specification_Summary_e_male.docx")

Subsetting data for females

specification_females <- setup(
  data = hardship_combined %>%
           filter(gender == 0),  # Filter for females
  y = "risktaking",
    x = c("e_oth_drinkingwater",
          "e_exp_watersanithyg100k", "e_ses_gini", "e_ses_school", "e_exp_disaster", 
          "e_exp_airdeath100k", "e_exp_watersanithyg"),
  controls = c("age_scale", "COUNTRY"), 
  model = "lm"
)

# Run the specifications for females
specification_results_females <- specr(specification_females)

# View the summary of the results
summary(specification_results_females)
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    16.251 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##  median  mad    min  max   q25  q75
##    0.02 1.43 -45.52 1.39 -2.95 0.56
## 
## Descriptive summary of sample sizes: 
## 
##  median    min    max
##  105457 105457 105457
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 e_oth_drink… risk… lm    no cova… all     riskta…     0.96      0.04     27.0 
## 2 e_oth_drink… risk… lm    age_sca… all     riskta…     0.4       0.04     11.3 
## 3 e_oth_drink… risk… lm    COUNTRY  all     riskta…    -2.11      0.37     -5.67
## 4 e_oth_drink… risk… lm    age_sca… all     riskta…    -2.17      0.36     -5.97
## 5 e_exp_water… risk… lm    no cova… all     riskta…     1.07      0.03     33.5 
## 6 e_exp_water… risk… lm    age_sca… all     riskta…     0.53      0.03     16.5 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>

Plots for female subset results

plot(specification_results_females)

(a_female <- plot(specification_results_females, type = "curve", ci = F, ribbon = T) + 
   geom_point(size = 4))

(b_female <- plot(specification_results_females, type = "choices", choices = c("x", "y", "model", "controls")) +
   geom_point(size = 2, shape = 4)) 

(c_female <- plot(specification_results_females, type = "samplesizes") + ylim(0, 400))

plot_grid(a_female, b_female, c_female, ncol = 1,
          align = "v",
          rel_heights = c(1.5, 2, 0.8),
          axis = "rbl")

plot(specification_results_females, type = "boxplot") + 
  geom_point(alpha = .4) + 
  scale_fill_brewer(palette = "Pastel2") +
  labs(x = "Effect size", fill = "")

Subsetting data for age-categories

run_specification_for_age <- function(data, age_id, age_label) {
  # Daten für die spezifische Altersgruppe filtern
  data_subset <- data %>%
    filter(age_numeric == age_id)
  
  # Setup für die Spezifikationen durchführen
  specification <- setup(
    data = data_subset,
    y = "risktaking",
    x = c("e_oth_drinkingwater",
          "e_exp_watersanithyg100k", "e_ses_gini", "e_ses_school", "e_exp_disaster", 
          "e_exp_airdeath100k", "e_exp_watersanithyg"),
    controls = c("age_scale", "COUNTRY"), 
    model = "lm"
  )

  # Spezifikationsergebnisse berechnen
  specification_results <- specr(specification)

  # Statistische Auswertungen drucken mit Alterskategorie-Titel
  cat("\nStatistische Ergebnisse für die Alterskategorie:", age_label, "\n")
  print(summary(specification_results, digits = 5))

  # Grafiken für die spezifische Altersgruppe erzeugen und anzeigen
  plot_list <- list(
    plot_a = plot(specification_results, type = "curve", ci = FALSE, ribbon = TRUE) + 
             geom_point(size = 4) + ggtitle(paste("Curve Plot -", age_label)),
    plot_b = plot(specification_results, type = "choices", choices = c("x", "y", "model", "controls")) + 
             geom_point(size = 2, shape = 4) + ggtitle(paste("Choices Plot -", age_label)),
    plot_c = plot(specification_results, type = "samplesizes") + ylim(0, 400) +
             ggtitle(paste("Sample Sizes Plot -", age_label)),
    plot_d = plot(specification_results, type = "boxplot") + 
             geom_point(alpha = .4) + scale_fill_brewer(palette = "Pastel2") + 
             labs(x = "Effect size", fill = "") + ggtitle(paste("Boxplot -", age_label))
  )

  # Rückgabe der Ergebnisse und Plots
  return(list(summary = summary(specification_results, digits = 5), plots = plot_list))
}

# Funktion für jede Altersgruppe aufrufen und sowohl statistische Zusammenfassungen als auch Plots ausgeben
for (i in 1:4) {
  results <- run_specification_for_age(hardship_combined, i, paste("Age Group", i))
  print(results$summary)  # Drucke die Zusammenfassung der Ergebnisse
  print(results$plots$plot_a)
  print(results$plots$plot_b)
  print(results$plots$plot_c)
  print(results$plots$plot_d)
}
## 
## Statistische Ergebnisse für die Alterskategorie: Age Group 1 
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    6.981 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##    median     mad       min     max      q25      q75
##  -0.18815 0.56552 -47.86192 1.45741 -3.16806 -0.04823
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   45989 45989 45989
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 e_oth_drink… risk… lm    no cova… all     riskta…  -0.0859    0.0456    -1.88 
## 2 e_oth_drink… risk… lm    age_sca… all     riskta…  -0.0860    0.0456    -1.89 
## 3 e_oth_drink… risk… lm    COUNTRY  all     riskta…  -2.29      0.402     -5.68 
## 4 e_oth_drink… risk… lm    age_sca… all     riskta…  -2.28      0.402     -5.67 
## 5 e_exp_water… risk… lm    no cova… all     riskta…  -0.0426    0.0466    -0.916
## 6 e_exp_water… risk… lm    age_sca… all     riskta…  -0.043     0.0465    -0.924
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    6.981 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##    median     mad       min     max      q25      q75
##  -0.18815 0.56552 -47.86192 1.45741 -3.16806 -0.04823
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   45989 45989 45989
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 e_oth_drink… risk… lm    no cova… all     riskta…  -0.0859    0.0456    -1.88 
## 2 e_oth_drink… risk… lm    age_sca… all     riskta…  -0.0860    0.0456    -1.89 
## 3 e_oth_drink… risk… lm    COUNTRY  all     riskta…  -2.29      0.402     -5.68 
## 4 e_oth_drink… risk… lm    age_sca… all     riskta…  -2.28      0.402     -5.67 
## 5 e_exp_water… risk… lm    no cova… all     riskta…  -0.0426    0.0466    -0.916
## 6 e_exp_water… risk… lm    age_sca… all     riskta…  -0.043     0.0465    -0.924
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL

## 
## Statistische Ergebnisse für die Alterskategorie: Age Group 2 
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    13.918 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad       min     max    q25     q75
##  0.06404 1.21845 -32.98014 1.00425 -2.117 0.76737
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   92700 92700 92700
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 e_oth_drink… risk… lm    no cova… all     riskta…    0.891    0.0361     24.7 
## 2 e_oth_drink… risk… lm    age_sca… all     riskta…    0.764    0.0363     21.0 
## 3 e_oth_drink… risk… lm    COUNTRY  all     riskta…   -1.58     0.384      -4.10
## 4 e_oth_drink… risk… lm    age_sca… all     riskta…   -1.51     0.383      -3.94
## 5 e_exp_water… risk… lm    no cova… all     riskta…    0.969    0.0340     28.5 
## 6 e_exp_water… risk… lm    age_sca… all     riskta…    0.864    0.0341     25.4 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    13.918 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad       min     max    q25     q75
##  0.06404 1.21845 -32.98014 1.00425 -2.117 0.76737
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   92700 92700 92700
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 e_oth_drink… risk… lm    no cova… all     riskta…    0.891    0.0361     24.7 
## 2 e_oth_drink… risk… lm    age_sca… all     riskta…    0.764    0.0363     21.0 
## 3 e_oth_drink… risk… lm    COUNTRY  all     riskta…   -1.58     0.384      -4.10
## 4 e_oth_drink… risk… lm    age_sca… all     riskta…   -1.51     0.383      -3.94
## 5 e_exp_water… risk… lm    no cova… all     riskta…    0.969    0.0340     28.5 
## 6 e_exp_water… risk… lm    age_sca… all     riskta…    0.864    0.0341     25.4 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL

## 
## Statistische Ergebnisse für die Alterskategorie: Age Group 3 
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    9.939 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad      min     max     q25     q75
##  0.64725 0.73654 -0.17899 4.54656 0.10487 1.03582
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   62865 62865 62865
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 e_oth_drink… risk… lm    no cova… all     riskta…   1.24      0.0631    19.6  
## 2 e_oth_drink… risk… lm    age_sca… all     riskta…   1.16      0.0630    18.4  
## 3 e_oth_drink… risk… lm    COUNTRY  all     riskta…   0.217     0.797      0.272
## 4 e_oth_drink… risk… lm    age_sca… all     riskta…   0.0812    0.795      0.102
## 5 e_exp_water… risk… lm    no cova… all     riskta…   1.08      0.0447    24.2  
## 6 e_exp_water… risk… lm    age_sca… all     riskta…   1.02      0.0447    22.8  
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    9.939 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad      min     max     q25     q75
##  0.64725 0.73654 -0.17899 4.54656 0.10487 1.03582
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   62865 62865 62865
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 e_oth_drink… risk… lm    no cova… all     riskta…   1.24      0.0631    19.6  
## 2 e_oth_drink… risk… lm    age_sca… all     riskta…   1.16      0.0630    18.4  
## 3 e_oth_drink… risk… lm    COUNTRY  all     riskta…   0.217     0.797      0.272
## 4 e_oth_drink… risk… lm    age_sca… all     riskta…   0.0812    0.795      0.102
## 5 e_exp_water… risk… lm    no cova… all     riskta…   1.08      0.0447    24.2  
## 6 e_exp_water… risk… lm    age_sca… all     riskta…   1.02      0.0447    22.8  
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL

## 
## Statistische Ergebnisse für die Alterskategorie: Age Group 4 
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    3.673 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad      min      max     q25    q75
##  1.26316 1.55782 -2.56259 84.15645 0.70693 5.4664
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   22996 22996 22996
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 e_oth_drink… risk… lm    no cova… all     riskta…     2.18    0.133      16.4 
## 2 e_oth_drink… risk… lm    age_sca… all     riskta…     2.15    0.133      16.2 
## 3 e_oth_drink… risk… lm    COUNTRY  all     riskta…     3.91    2.16        1.81
## 4 e_oth_drink… risk… lm    age_sca… all     riskta…     4.02    2.16        1.86
## 5 e_exp_water… risk… lm    no cova… all     riskta…     1.37    0.0775     17.7 
## 6 e_exp_water… risk… lm    age_sca… all     riskta…     1.36    0.0774     17.5 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL
## Results of the specification curve analysis
## -------------------
## Technical details:
## 
##   Class:                          specr.object -- version: 1.0.1 
##   Cores used:                     1 
##   Duration of fitting process:    3.673 sec elapsed 
##   Number of specifications:       28 
## 
## Descriptive summary of the specification curve:
## 
##   median     mad      min      max     q25    q75
##  1.26316 1.55782 -2.56259 84.15645 0.70693 5.4664
## 
## Descriptive summary of sample sizes: 
## 
##  median   min   max
##   22996 22996 22996
## 
## Head of the specification results (first 6 rows): 
## 
## # A tibble: 6 × 24
##   x            y     model controls subsets formula estimate std.error statistic
##   <chr>        <chr> <chr> <chr>    <chr>   <glue>     <dbl>     <dbl>     <dbl>
## 1 e_oth_drink… risk… lm    no cova… all     riskta…     2.18    0.133      16.4 
## 2 e_oth_drink… risk… lm    age_sca… all     riskta…     2.15    0.133      16.2 
## 3 e_oth_drink… risk… lm    COUNTRY  all     riskta…     3.91    2.16        1.81
## 4 e_oth_drink… risk… lm    age_sca… all     riskta…     4.02    2.16        1.86
## 5 e_exp_water… risk… lm    no cova… all     riskta…     1.37    0.0775     17.7 
## 6 e_exp_water… risk… lm    age_sca… all     riskta…     1.36    0.0774     17.5 
## # ℹ 15 more variables: p.value <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   fit_r.squared <dbl>, fit_adj.r.squared <dbl>, fit_sigma <dbl>,
## #   fit_statistic <dbl>, fit_p.value <dbl>, fit_df <dbl>, fit_logLik <dbl>,
## #   fit_AIC <dbl>, fit_BIC <dbl>, fit_deviance <dbl>, fit_df.residual <dbl>,
## #   fit_nobs <dbl>
## NULL